diff --git "a/profile_trace/iteration_22528/rank2_trace.json" "b/profile_trace/iteration_22528/rank2_trace.json" new file mode 100644--- /dev/null +++ "b/profile_trace/iteration_22528/rank2_trace.json" @@ -0,0 +1,157273 @@ + +{ + "schemaVersion": 1, + "deviceProperties": [ + { + "id": 0, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 1, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 2, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 3, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 4, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 5, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 6, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 7, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + } + ], + "cupti_version": 22, + "cuda_runtime_version": 12040, + "cuda_driver_version": 12080, + "distributedInfo": {"backend": "nccl", "rank": 2, "world_size": 8, "pg_count": 1, "pg_config": [{"pg_name": "0", "pg_desc": "default_pg", "backend_config": "cuda:nccl", "pg_size": 8, "ranks": [0, 1, 2, 3, 4, 5, 6, 7]}], "nccl_version": "2.21.5"}, + "record_shapes": 1, + "trace_id": "AF0055C445624F3C9D525E14201E2812", + "traceEvents": [ + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937191782.543, "dur": 133.218, + "args": { + "External id": 977409,"Record function id": 0, "Sequence number": 10552468, "Fwd thread id": 1, "Ev Idx": 0 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937191802.653, "dur": 102.525, + "args": { + "External id": 977410,"Sequence number": 10552468, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 1 + } + }, + { + "ph": "f", "id": 1, "pid": 2338708, "tid": 2379421, "ts": 6345937191802.653, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338708, "tid": 2379421, + "ts": 6345937191812.231, "dur": 90.467, + "args": { + "External id": 977411,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 2 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937191928.505, "dur": 343.473, + "args": { + "External id": 977412,"Record function id": 0, "Ev Idx": 3 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937191993.631, "dur": 182.294, + "args": { + "External id": 977413,"Record function id": 0, "Ev Idx": 4 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 2338708, "tid": 2379421, + "ts": 6345937192045.846, "dur": 114.600, + "args": { + "External id": 977414,"Record function id": 0, "Ev Idx": 5 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937192183.008, "dur": 2.790, + "args": { + "External id": 977415,"Sequence number": 10552467, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 6 + } + }, + { + "ph": "f", "id": 2, "pid": 2338708, "tid": 2379421, "ts": 6345937192183.008, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937192191.883, "dur": 73.125, + "args": { + "External id": 977416,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 7 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937192202.739, "dur": 61.498, + "args": { + "External id": 977417,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 8 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937192217.112, "dur": 4.468, + "args": { + "External id": 977418,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937192285.083, "dur": 36144.224, + "args": { + "External id": 977419,"Record function id": 0, "Sequence number": 10552465, "Fwd thread id": 1, "Ev Idx": 10 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937192287.077, "dur": 36123.356, + "args": { + "External id": 977420,"Sequence number": 10552465, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 11 + } + }, + { + "ph": "f", "id": 3, "pid": 2338708, "tid": 2379421, "ts": 6345937192287.077, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937192337.053, "dur": 4.682, + "args": { + "External id": 977421,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937192348.759, "dur": 35744.094, + "args": { + "External id": 977422,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937192350.997, "dur": 35741.329, + "args": { + "External id": 977423,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 14 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937192357.186, "dur": 7.935, + "args": { + "External id": 977424,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937192367.458, "dur": 35723.253, + "args": { + "External id": 977425,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338708, "tid": 2379421, + "ts": 6345937228098.779, "dur": 0.466, + "args": { + "External id": 977426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 17 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345937228101.396, "dur": 3.199, + "args": { + "External id": 977427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 18 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345937228103.314, "dur": 1.115, + "args": { + "External id": 977428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2379421, + "ts": 6345937228111.423, "dur": 33.570, + "args": { + "External id": 977429,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 20 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338708, "tid": 2379421, + "ts": 6345937228154.619, "dur": 50.625, + "args": { + "External id": 977430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2379421, + "ts": 6345937228156.402, "dur": 48.624, + "args": { + "External id": 977431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 22 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2379421, + "ts": 6345937228158.105, "dur": 46.434, + "args": { + "External id": 977432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 23 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937228445.463, "dur": 22.954, + "args": { + "External id": 977433,"Record function id": 0, "Sequence number": 10552464, "Fwd thread id": 1, "Ev Idx": 24 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937228447.898, "dur": 17.124, + "args": { + "External id": 977434,"Sequence number": 10552464, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 25 + } + }, + { + "ph": "f", "id": 4, "pid": 2338708, "tid": 2379421, "ts": 6345937228447.898, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937228453.373, "dur": 11.348, + "args": { + "External id": 977435,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 26 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937228458.023, "dur": 6.427, + "args": { + "External id": 977436,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 27 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937228476.265, "dur": 128.272, + "args": { + "External id": 977437,"Record function id": 0, "Sequence number": 10552463, "Fwd thread id": 1, "Ev Idx": 28 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937228477.399, "dur": 118.132, + "args": { + "External id": 977438,"Sequence number": 10552463, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 29 + } + }, + { + "ph": "f", "id": 5, "pid": 2338708, "tid": 2379421, "ts": 6345937228477.399, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937228481.705, "dur": 112.967, + "args": { + "External id": 977439,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 30 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937228488.781, "dur": 44.669, + "args": { + "External id": 977440,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 31 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937228493.934, "dur": 7.743, + "args": { + "External id": 977441,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 32 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937228503.621, "dur": 29.467, + "args": { + "External id": 977442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 33 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937228510.004, "dur": 22.576, + "args": { + "External id": 977443,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 34 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937228536.469, "dur": 8.282, + "args": { + "External id": 977444,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 35 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937228539.632, "dur": 4.624, + "args": { + "External id": 977445,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 36 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937228546.240, "dur": 47.429, + "args": { + "External id": 977446,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 37 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937228610.096, "dur": 74.105, + "args": { + "External id": 977447,"Record function id": 0, "Sequence number": 10552462, "Fwd thread id": 1, "Ev Idx": 38 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937228611.361, "dur": 68.676, + "args": { + "External id": 977448,"Sequence number": 10552462, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 39 + } + }, + { + "ph": "f", "id": 6, "pid": 2338708, "tid": 2379421, "ts": 6345937228611.361, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937228615.342, "dur": 64.208, + "args": { + "External id": 977449,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "3"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 40 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937228621.803, "dur": 24.320, + "args": { + "External id": 977450,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 41 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937228622.824, "dur": 3.657, + "args": { + "External id": 977451,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 42 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937228627.528, "dur": 18.271, + "args": { + "External id": 977452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 43 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937228631.726, "dur": 13.565, + "args": { + "External id": 977453,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 44 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345937228648.017, "dur": 9.261, + "args": { + "External id": 977454,"Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 45 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937228655.326, "dur": 1.217, + "args": { + "External id": 977455,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 46 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937228658.107, "dur": 20.875, + "args": { + "External id": 977456,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 47 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937228688.594, "dur": 223.821, + "args": { + "External id": 977457,"Record function id": 0, "Sequence number": 10552461, "Fwd thread id": 1, "Ev Idx": 48 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937228690.037, "dur": 217.155, + "args": { + "External id": 977458,"Sequence number": 10552461, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 49 + } + }, + { + "ph": "f", "id": 7, "pid": 2338708, "tid": 2379421, "ts": 6345937228690.037, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937228694.288, "dur": 212.593, + "args": { + "External id": 977459,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 50 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937228697.330, "dur": 22.162, + "args": { + "External id": 977460,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 51 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937228698.201, "dur": 3.250, + "args": { + "External id": 977461,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 52 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937228702.290, "dur": 16.867, + "args": { + "External id": 977462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 53 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937228703.411, "dur": 15.282, + "args": { + "External id": 977463,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 54 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937228720.827, "dur": 2.749, + "args": { + "External id": 977464,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 55 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937228722.259, "dur": 1.073, + "args": { + "External id": 977465,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 56 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937228726.801, "dur": 178.453, + "args": { + "External id": 977466,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 57 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937228917.879, "dur": 113.864, + "args": { + "External id": 977467,"Record function id": 0, "Sequence number": 10552460, "Fwd thread id": 1, "Ev Idx": 58 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937228919.061, "dur": 86.825, + "args": { + "External id": 977468,"Sequence number": 10552460, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 59 + } + }, + { + "ph": "f", "id": 8, "pid": 2338708, "tid": 2379421, "ts": 6345937228919.061, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937228920.970, "dur": 84.544, + "args": { + "External id": 977469,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 60 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937228924.511, "dur": 17.681, + "args": { + "External id": 977470,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 61 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937228925.560, "dur": 2.580, + "args": { + "External id": 977471,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 62 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937228928.846, "dur": 13.031, + "args": { + "External id": 977472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 63 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937228929.951, "dur": 11.547, + "args": { + "External id": 977473,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 64 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937228943.245, "dur": 4.285, + "args": { + "External id": 977474,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 65 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937228946.631, "dur": 0.697, + "args": { + "External id": 977475,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 66 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937228948.348, "dur": 56.049, + "args": { + "External id": 977476,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 67 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937229043.016, "dur": 90.097, + "args": { + "External id": 977477,"Record function id": 0, "Sequence number": 10552459, "Fwd thread id": 1, "Ev Idx": 68 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937229045.582, "dur": 1.185, + "args": { + "External id": 977478,"Sequence number": 10552459, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 69 + } + }, + { + "ph": "f", "id": 9, "pid": 2338708, "tid": 2379421, "ts": 6345937229045.582, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937229050.457, "dur": 77.588, + "args": { + "External id": 977479,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 70 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937229089.727, "dur": 37.768, + "args": { + "External id": 977480,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 71 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937229103.149, "dur": 1.027, + "args": { + "External id": 977481,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 72 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937229141.654, "dur": 2361.800, + "args": { + "External id": 977482,"Record function id": 0, "Sequence number": 10552457, "Fwd thread id": 1, "Ev Idx": 73 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937229143.502, "dur": 2319.313, + "args": { + "External id": 977483,"Sequence number": 10552457, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 74 + } + }, + { + "ph": "f", "id": 10, "pid": 2338708, "tid": 2379421, "ts": 6345937229143.502, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937229189.755, "dur": 3.106, + "args": { + "External id": 977484,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 75 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937229196.217, "dur": 2012.887, + "args": { + "External id": 977485,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 76 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937229198.259, "dur": 2010.353, + "args": { + "External id": 977486,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 77 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937229201.831, "dur": 7.681, + "args": { + "External id": 977487,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 78 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937229210.739, "dur": 1996.659, + "args": { + "External id": 977488,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 79 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338708, "tid": 2379421, + "ts": 6345937231213.467, "dur": 0.455, + "args": { + "External id": 977489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 80 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231215.692, "dur": 5.827, + "args": { + "External id": 977490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 81 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231220.273, "dur": 1.084, + "args": { + "External id": 977491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 82 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2379421, + "ts": 6345937231227.135, "dur": 26.233, + "args": { + "External id": 977492,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 83 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338708, "tid": 2379421, + "ts": 6345937231260.118, "dur": 45.915, + "args": { + "External id": 977493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 84 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2379421, + "ts": 6345937231261.814, "dur": 44.003, + "args": { + "External id": 977494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 85 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2379421, + "ts": 6345937231263.405, "dur": 42.067, + "args": { + "External id": 977495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 86 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231476.435, "dur": 21.263, + "args": { + "External id": 977496,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 87 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937231515.730, "dur": 18.656, + "args": { + "External id": 977497,"Record function id": 0, "Sequence number": 10552456, "Fwd thread id": 1, "Ev Idx": 88 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937231517.562, "dur": 13.554, + "args": { + "External id": 977498,"Sequence number": 10552456, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 89 + } + }, + { + "ph": "f", "id": 11, "pid": 2338708, "tid": 2379421, "ts": 6345937231517.562, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937231524.118, "dur": 6.717, + "args": { + "External id": 977499,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 90 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937231525.714, "dur": 4.924, + "args": { + "External id": 977500,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 91 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937231538.504, "dur": 91.040, + "args": { + "External id": 977501,"Record function id": 0, "Sequence number": 10552455, "Fwd thread id": 1, "Ev Idx": 92 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937231539.810, "dur": 82.988, + "args": { + "External id": 977502,"Sequence number": 10552455, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 93 + } + }, + { + "ph": "f", "id": 12, "pid": 2338708, "tid": 2379421, "ts": 6345937231539.810, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937231542.003, "dur": 80.235, + "args": { + "External id": 977503,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 94 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937231551.026, "dur": 24.273, + "args": { + "External id": 977504,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 95 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937231553.079, "dur": 4.014, + "args": { + "External id": 977505,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 96 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231558.308, "dur": 16.652, + "args": { + "External id": 977506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 97 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231560.115, "dur": 14.012, + "args": { + "External id": 977507,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 98 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937231577.426, "dur": 4.197, + "args": { + "External id": 977508,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 99 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937231579.747, "dur": 1.468, + "args": { + "External id": 977509,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231582.871, "dur": 38.095, + "args": { + "External id": 977510,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937231634.467, "dur": 66.489, + "args": { + "External id": 977511,"Record function id": 0, "Sequence number": 10552454, "Fwd thread id": 1, "Ev Idx": 102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937231636.025, "dur": 60.842, + "args": { + "External id": 977512,"Sequence number": 10552454, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 103 + } + }, + { + "ph": "f", "id": 13, "pid": 2338708, "tid": 2379421, "ts": 6345937231636.025, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937231641.495, "dur": 55.036, + "args": { + "External id": 977513,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "2"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937231643.542, "dur": 25.137, + "args": { + "External id": 977514,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937231644.886, "dur": 3.042, + "args": { + "External id": 977515,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231651.042, "dur": 17.269, + "args": { + "External id": 977516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231652.147, "dur": 15.733, + "args": { + "External id": 977517,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345937231670.161, "dur": 8.002, + "args": { + "External id": 977518,"Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937231676.520, "dur": 0.977, + "args": { + "External id": 977519,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231679.058, "dur": 16.822, + "args": { + "External id": 977520,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937231708.068, "dur": 141.146, + "args": { + "External id": 977521,"Record function id": 0, "Sequence number": 10552453, "Fwd thread id": 1, "Ev Idx": 112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937231709.200, "dur": 134.961, + "args": { + "External id": 977522,"Sequence number": 10552453, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 113 + } + }, + { + "ph": "f", "id": 14, "pid": 2338708, "tid": 2379421, "ts": 6345937231709.200, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937231711.207, "dur": 132.339, + "args": { + "External id": 977523,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937231712.352, "dur": 19.830, + "args": { + "External id": 977524,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937231713.256, "dur": 2.556, + "args": { + "External id": 977525,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231718.687, "dur": 13.155, + "args": { + "External id": 977526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231719.920, "dur": 11.460, + "args": { + "External id": 977527,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937231733.274, "dur": 5.133, + "args": { + "External id": 977528,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937231737.396, "dur": 0.806, + "args": { + "External id": 977529,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231739.187, "dur": 103.109, + "args": { + "External id": 977530,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937231854.513, "dur": 111.389, + "args": { + "External id": 977531,"Record function id": 0, "Sequence number": 10552452, "Fwd thread id": 1, "Ev Idx": 122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937231855.947, "dur": 89.909, + "args": { + "External id": 977532,"Sequence number": 10552452, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 123 + } + }, + { + "ph": "f", "id": 15, "pid": 2338708, "tid": 2379421, "ts": 6345937231855.947, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937231860.200, "dur": 85.280, + "args": { + "External id": 977533,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937231861.539, "dur": 20.298, + "args": { + "External id": 977534,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937231862.587, "dur": 2.570, + "args": { + "External id": 977535,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231866.119, "dur": 15.397, + "args": { + "External id": 977536,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231869.643, "dur": 11.436, + "args": { + "External id": 977537,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937231883.047, "dur": 2.445, + "args": { + "External id": 977538,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937231884.636, "dur": 0.598, + "args": { + "External id": 977539,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231886.534, "dur": 57.873, + "args": { + "External id": 977540,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937231951.071, "dur": 13.114, + "args": { + "External id": 977541,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937231971.485, "dur": 61.149, + "args": { + "External id": 977542,"Record function id": 0, "Sequence number": 10552451, "Fwd thread id": 1, "Ev Idx": 133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937231972.745, "dur": 1.173, + "args": { + "External id": 977543,"Sequence number": 10552451, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 134 + } + }, + { + "ph": "f", "id": 16, "pid": 2338708, "tid": 2379421, "ts": 6345937231972.745, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937231976.905, "dur": 50.973, + "args": { + "External id": 977544,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937231979.782, "dur": 47.180, + "args": { + "External id": 977545,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937231991.703, "dur": 0.822, + "args": { + "External id": 977546,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937232041.130, "dur": 3420.390, + "args": { + "External id": 977547,"Record function id": 0, "Sequence number": 10552449, "Fwd thread id": 1, "Ev Idx": 138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937232045.243, "dur": 3378.644, + "args": { + "External id": 977548,"Sequence number": 10552449, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 139 + } + }, + { + "ph": "f", "id": 17, "pid": 2338708, "tid": 2379421, "ts": 6345937232045.243, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937232135.884, "dur": 3.645, + "args": { + "External id": 977549,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937232142.864, "dur": 3049.433, + "args": { + "External id": 977550,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937232144.877, "dur": 3047.017, + "args": { + "External id": 977551,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937232147.908, "dur": 7.536, + "args": { + "External id": 977552,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937232159.254, "dur": 3031.440, + "args": { + "External id": 977553,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338708, "tid": 2379421, + "ts": 6345937235197.065, "dur": 0.333, + "args": { + "External id": 977554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235198.810, "dur": 3.080, + "args": { + "External id": 977555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235200.460, "dur": 1.280, + "args": { + "External id": 977556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2379421, + "ts": 6345937235206.677, "dur": 24.187, + "args": { + "External id": 977557,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338708, "tid": 2379421, + "ts": 6345937235239.557, "dur": 42.944, + "args": { + "External id": 977558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2379421, + "ts": 6345937235241.129, "dur": 41.163, + "args": { + "External id": 977559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2379421, + "ts": 6345937235242.498, "dur": 39.421, + "args": { + "External id": 977560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235437.878, "dur": 18.627, + "args": { + "External id": 977561,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937235474.455, "dur": 18.732, + "args": { + "External id": 977562,"Record function id": 0, "Sequence number": 10552448, "Fwd thread id": 1, "Ev Idx": 153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937235479.336, "dur": 10.986, + "args": { + "External id": 977563,"Sequence number": 10552448, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 154 + } + }, + { + "ph": "f", "id": 18, "pid": 2338708, "tid": 2379421, "ts": 6345937235479.336, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937235483.331, "dur": 6.708, + "args": { + "External id": 977564,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937235484.932, "dur": 4.892, + "args": { + "External id": 977565,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937235497.340, "dur": 85.024, + "args": { + "External id": 977566,"Record function id": 0, "Sequence number": 10552447, "Fwd thread id": 1, "Ev Idx": 157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937235498.511, "dur": 77.387, + "args": { + "External id": 977567,"Sequence number": 10552447, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 158 + } + }, + { + "ph": "f", "id": 19, "pid": 2338708, "tid": 2379421, "ts": 6345937235498.511, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937235502.857, "dur": 72.507, + "args": { + "External id": 977568,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937235506.089, "dur": 25.141, + "args": { + "External id": 977569,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937235507.804, "dur": 4.184, + "args": { + "External id": 977570,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235513.614, "dur": 17.293, + "args": { + "External id": 977571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235515.886, "dur": 14.508, + "args": { + "External id": 977572,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937235533.010, "dur": 6.797, + "args": { + "External id": 977573,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937235537.716, "dur": 1.619, + "args": { + "External id": 977574,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235543.224, "dur": 31.145, + "args": { + "External id": 977575,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937235587.119, "dur": 58.142, + "args": { + "External id": 977576,"Record function id": 0, "Sequence number": 10552446, "Fwd thread id": 1, "Ev Idx": 167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937235588.728, "dur": 53.161, + "args": { + "External id": 977577,"Sequence number": 10552446, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 168 + } + }, + { + "ph": "f", "id": 20, "pid": 2338708, "tid": 2379421, "ts": 6345937235588.728, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937235591.339, "dur": 50.169, + "args": { + "External id": 977578,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937235593.519, "dur": 18.205, + "args": { + "External id": 977579,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937235594.610, "dur": 3.065, + "args": { + "External id": 977580,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235598.432, "dur": 13.013, + "args": { + "External id": 977581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235599.375, "dur": 11.674, + "args": { + "External id": 977582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345937235613.429, "dur": 10.069, + "args": { + "External id": 977583,"Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937235621.889, "dur": 1.013, + "args": { + "External id": 977584,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235624.224, "dur": 16.476, + "args": { + "External id": 977585,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937235649.647, "dur": 134.758, + "args": { + "External id": 977586,"Record function id": 0, "Sequence number": 10552445, "Fwd thread id": 1, "Ev Idx": 177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937235650.887, "dur": 128.760, + "args": { + "External id": 977587,"Sequence number": 10552445, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 178 + } + }, + { + "ph": "f", "id": 21, "pid": 2338708, "tid": 2379421, "ts": 6345937235650.887, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937235652.691, "dur": 126.403, + "args": { + "External id": 977588,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937235654.204, "dur": 22.262, + "args": { + "External id": 977589,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937235655.019, "dur": 5.470, + "args": { + "External id": 977590,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235661.484, "dur": 14.636, + "args": { + "External id": 977591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235662.252, "dur": 13.295, + "args": { + "External id": 977592,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937235677.794, "dur": 5.122, + "args": { + "External id": 977593,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937235681.871, "dur": 0.782, + "args": { + "External id": 977594,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235686.759, "dur": 91.250, + "args": { + "External id": 977595,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937235790.319, "dur": 115.420, + "args": { + "External id": 977596,"Record function id": 0, "Sequence number": 10552444, "Fwd thread id": 1, "Ev Idx": 187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937235791.604, "dur": 93.101, + "args": { + "External id": 977597,"Sequence number": 10552444, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 188 + } + }, + { + "ph": "f", "id": 22, "pid": 2338708, "tid": 2379421, "ts": 6345937235791.604, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937235795.334, "dur": 89.009, + "args": { + "External id": 977598,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937235796.527, "dur": 18.622, + "args": { + "External id": 977599,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937235797.428, "dur": 3.134, + "args": { + "External id": 977600,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235801.260, "dur": 13.565, + "args": { + "External id": 977601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235802.692, "dur": 11.594, + "args": { + "External id": 977602,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937235816.527, "dur": 4.754, + "args": { + "External id": 977603,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937235820.232, "dur": 0.737, + "args": { + "External id": 977604,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235824.812, "dur": 58.569, + "args": { + "External id": 977605,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937235889.346, "dur": 14.946, + "args": { + "External id": 977606,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937235913.943, "dur": 38.010, + "args": { + "External id": 977607,"Record function id": 0, "Sequence number": 10552443, "Fwd thread id": 1, "Ev Idx": 198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937235915.634, "dur": 1.565, + "args": { + "External id": 977608,"Sequence number": 10552443, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 199 + } + }, + { + "ph": "f", "id": 23, "pid": 2338708, "tid": 2379421, "ts": 6345937235915.634, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937235919.753, "dur": 27.018, + "args": { + "External id": 977609,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937235921.907, "dur": 24.320, + "args": { + "External id": 977610,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937235927.996, "dur": 0.789, + "args": { + "External id": 977611,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937235957.629, "dur": 3463.957, + "args": { + "External id": 977612,"Record function id": 0, "Sequence number": 10552442, "Fwd thread id": 1, "Ev Idx": 203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937235970.375, "dur": 3415.435, + "args": { + "External id": 977613,"Sequence number": 10552442, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 204 + } + }, + { + "ph": "f", "id": 24, "pid": 2338708, "tid": 2379421, "ts": 6345937235970.375, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937236001.214, "dur": 3.444, + "args": { + "External id": 977614,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937236007.269, "dur": 3149.520, + "args": { + "External id": 977615,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937236028.151, "dur": 3128.279, + "args": { + "External id": 977616,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937236033.537, "dur": 4.957, + "args": { + "External id": 977617,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937236039.694, "dur": 3115.089, + "args": { + "External id": 977618,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338708, "tid": 2379421, + "ts": 6345937239161.640, "dur": 0.412, + "args": { + "External id": 977619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239165.965, "dur": 2.970, + "args": { + "External id": 977620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239167.787, "dur": 0.978, + "args": { + "External id": 977621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2379421, + "ts": 6345937239174.276, "dur": 23.976, + "args": { + "External id": 977622,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338708, "tid": 2379421, + "ts": 6345937239203.125, "dur": 45.924, + "args": { + "External id": 977623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2379421, + "ts": 6345937239204.683, "dur": 44.127, + "args": { + "External id": 977624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2379421, + "ts": 6345937239206.314, "dur": 42.136, + "args": { + "External id": 977625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239398.730, "dur": 17.242, + "args": { + "External id": 977626,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937239441.611, "dur": 14.804, + "args": { + "External id": 977627,"Record function id": 0, "Ev Idx": 218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937239444.201, "dur": 10.286, + "args": { + "External id": 977628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937239447.794, "dur": 5.477, + "args": { + "External id": 977629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937239449.057, "dur": 4.064, + "args": { + "External id": 977630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937239460.779, "dur": 18.359, + "args": { + "External id": 977631,"Record function id": 0, "Sequence number": 10552441, "Fwd thread id": 1, "Ev Idx": 222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937239464.412, "dur": 11.696, + "args": { + "External id": 977632,"Sequence number": 10552441, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 223 + } + }, + { + "ph": "f", "id": 25, "pid": 2338708, "tid": 2379421, "ts": 6345937239464.412, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937239468.208, "dur": 7.613, + "args": { + "External id": 977633,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937239471.991, "dur": 3.642, + "args": { + "External id": 977634,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937239483.045, "dur": 78.521, + "args": { + "External id": 977635,"Record function id": 0, "Sequence number": 10552440, "Fwd thread id": 1, "Ev Idx": 226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937239483.968, "dur": 71.591, + "args": { + "External id": 977636,"Sequence number": 10552440, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 227 + } + }, + { + "ph": "f", "id": 26, "pid": 2338708, "tid": 2379421, "ts": 6345937239483.968, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937239485.970, "dur": 69.162, + "args": { + "External id": 977637,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937239489.339, "dur": 24.938, + "args": { + "External id": 977638,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937239491.360, "dur": 3.803, + "args": { + "External id": 977639,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239496.282, "dur": 17.672, + "args": { + "External id": 977640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239498.298, "dur": 15.049, + "args": { + "External id": 977641,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937239516.242, "dur": 5.985, + "args": { + "External id": 977642,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937239520.608, "dur": 1.324, + "args": { + "External id": 977643,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239525.528, "dur": 28.741, + "args": { + "External id": 977644,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937239566.532, "dur": 60.593, + "args": { + "External id": 977645,"Record function id": 0, "Sequence number": 10552439, "Fwd thread id": 1, "Ev Idx": 236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937239567.555, "dur": 55.789, + "args": { + "External id": 977646,"Sequence number": 10552439, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 237 + } + }, + { + "ph": "f", "id": 27, "pid": 2338708, "tid": 2379421, "ts": 6345937239567.555, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937239569.771, "dur": 53.213, + "args": { + "External id": 977647,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937239571.773, "dur": 23.698, + "args": { + "External id": 977648,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937239577.703, "dur": 3.924, + "args": { + "External id": 977649,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239582.390, "dur": 12.702, + "args": { + "External id": 977650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239583.336, "dur": 11.208, + "args": { + "External id": 977651,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345937239596.872, "dur": 7.945, + "args": { + "External id": 977652,"Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937239603.041, "dur": 1.167, + "args": { + "External id": 977653,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239606.057, "dur": 16.367, + "args": { + "External id": 977654,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937239631.833, "dur": 129.176, + "args": { + "External id": 977655,"Record function id": 0, "Sequence number": 10552438, "Fwd thread id": 1, "Ev Idx": 246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937239633.113, "dur": 124.111, + "args": { + "External id": 977656,"Sequence number": 10552438, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 247 + } + }, + { + "ph": "f", "id": 28, "pid": 2338708, "tid": 2379421, "ts": 6345937239633.113, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937239634.729, "dur": 121.961, + "args": { + "External id": 977657,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937239637.964, "dur": 17.441, + "args": { + "External id": 977658,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937239638.831, "dur": 2.926, + "args": { + "External id": 977659,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239642.527, "dur": 12.565, + "args": { + "External id": 977660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239643.411, "dur": 11.280, + "args": { + "External id": 977661,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937239656.511, "dur": 4.940, + "args": { + "External id": 977662,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937239658.064, "dur": 3.180, + "args": { + "External id": 977663,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239665.117, "dur": 90.217, + "args": { + "External id": 977664,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937239766.429, "dur": 120.568, + "args": { + "External id": 977665,"Record function id": 0, "Sequence number": 10552437, "Fwd thread id": 1, "Ev Idx": 256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937239767.447, "dur": 98.404, + "args": { + "External id": 977666,"Sequence number": 10552437, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 257 + } + }, + { + "ph": "f", "id": 29, "pid": 2338708, "tid": 2379421, "ts": 6345937239767.447, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937239769.148, "dur": 96.194, + "args": { + "External id": 977667,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345937239770.211, "dur": 30.342, + "args": { + "External id": 977668,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937239776.676, "dur": 2.864, + "args": { + "External id": 977669,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239785.355, "dur": 14.886, + "args": { + "External id": 977670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239788.477, "dur": 11.304, + "args": { + "External id": 977671,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937239804.352, "dur": 2.539, + "args": { + "External id": 977672,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937239805.804, "dur": 0.931, + "args": { + "External id": 977673,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239807.638, "dur": 56.914, + "args": { + "External id": 977674,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937239870.953, "dur": 13.585, + "args": { + "External id": 977675,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937239893.523, "dur": 429.612, + "args": { + "External id": 977676,"Record function id": 0, "Sequence number": 10552436, "Fwd thread id": 1, "Ev Idx": 267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937239894.925, "dur": 417.228, + "args": { + "External id": 977677,"Sequence number": 10552436, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 268 + } + }, + { + "ph": "f", "id": 30, "pid": 2338708, "tid": 2379421, "ts": 6345937239894.925, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937240128.016, "dur": 49.984, + "args": { + "External id": 977678,"kernel_hash": "csesqrbnxb6gkjrwgoohyamgdaghjz2d2andcfwzecbkqzeczzqz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "131072", "4096", "1", "993", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/csesqrbnxb6gkjrwgoohyamgdaghjz2d2andcfwzecbkqzeczzqz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [4096], [131072, 4096], [131072, 4096], [132, 4096], [131072], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937240219.967, "dur": 26.792, + "args": { + "External id": 977679,"kernel_hash": "cgpnzfm4ww5f67uofcrd54t5w35w6y4yspbhmhqt5ddc6salf5zl", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/gp/cgpnzfm4ww5f67uofcrd54t5w35w6y4yspbhmhqt5ddc6salf5zl.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937240266.726, "dur": 19.185, + "args": { + "External id": 977680,"kernel_hash": "cvj4y67mu47myxc3c6bg7waq6ihcppieaul2mb3dd66obpbk7cmj", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvj4y67mu47myxc3c6bg7waq6ihcppieaul2mb3dd66obpbk7cmj.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937240336.363, "dur": 16.017, + "args": { + "External id": 977681,"Record function id": 0, "Ev Idx": 272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937240338.692, "dur": 12.656, + "args": { + "External id": 977682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937240342.701, "dur": 7.615, + "args": { + "External id": 977683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937240346.701, "dur": 3.500, + "args": { + "External id": 977684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: StackBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937240357.415, "dur": 42.308, + "args": { + "External id": 977685,"Record function id": 0, "Sequence number": 10552435, "Fwd thread id": 1, "Ev Idx": 276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "StackBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937240358.770, "dur": 31.094, + "args": { + "External id": 977686,"Sequence number": 10552435, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 277 + } + }, + { + "ph": "f", "id": 31, "pid": 2338708, "tid": 2379421, "ts": 6345937240358.770, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345937240361.341, "dur": 9.855, + "args": { + "External id": 977687,"Record function id": 0, "Concrete Inputs": ["", "-2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937240367.677, "dur": 1.321, + "args": { + "External id": 977688,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345937240371.841, "dur": 8.255, + "args": { + "External id": 977689,"Record function id": 0, "Concrete Inputs": ["", "-2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937240378.713, "dur": 0.646, + "args": { + "External id": 977690,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345937240380.780, "dur": 2.202, + "args": { + "External id": 977691,"Record function id": 0, "Concrete Inputs": ["", "-2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937240381.904, "dur": 0.473, + "args": { + "External id": 977692,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345937240383.588, "dur": 5.595, + "args": { + "External id": 977693,"Record function id": 0, "Concrete Inputs": ["", "-2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937240388.061, "dur": 0.511, + "args": { + "External id": 977694,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937240404.292, "dur": 7.125, + "args": { + "External id": 977695,"Record function id": 0, "Sequence number": 10552434, "Fwd thread id": 1, "Ev Idx": 286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937240405.314, "dur": 1.213, + "args": { + "External id": 977696,"Sequence number": 10552434, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 287 + } + }, + { + "ph": "f", "id": 32, "pid": 2338708, "tid": 2379421, "ts": 6345937240405.314, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937240415.932, "dur": 563.483, + "args": { + "External id": 977697,"Record function id": 0, "Sequence number": 10552433, "Fwd thread id": 1, "Ev Idx": 288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937240417.161, "dur": 547.105, + "args": { + "External id": 977698,"Sequence number": 10552433, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 289 + } + }, + { + "ph": "f", "id": 33, "pid": 2338708, "tid": 2379421, "ts": 6345937240417.161, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937240460.171, "dur": 15.613, + "args": { + "External id": 977699,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937240469.679, "dur": 5.719, + "args": { + "External id": 977700,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937240480.703, "dur": 7.202, + "args": { + "External id": 977701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937240483.631, "dur": 3.205, + "args": { + "External id": 977702,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937240485.680, "dur": 0.877, + "args": { + "External id": 977703,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2379421, + "ts": 6345937240494.581, "dur": 135.018, + "args": { + "External id": 977704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937240495.753, "dur": 7.303, + "args": { + "External id": 977705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937240496.429, "dur": 5.882, + "args": { + "External id": 977706,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937240499.581, "dur": 2.601, + "args": { + "External id": 977707,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2379421, + "ts": 6345937240504.812, "dur": 123.694, + "args": { + "External id": 977708,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937240506.690, "dur": 120.766, + "args": { + "External id": 977709,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345937240635.949, "dur": 7.821, + "args": { + "External id": 977710,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937240640.527, "dur": 3.086, + "args": { + "External id": 977711,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937240679.940, "dur": 8.135, + "args": { + "External id": 977712,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937240689.371, "dur": 2.381, + "args": { + "External id": 977713,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937240695.273, "dur": 2.125, + "args": { + "External id": 977714,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937240739.316, "dur": 2.700, + "args": { + "External id": 977715,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937240740.150, "dur": 1.657, + "args": { + "External id": 977716,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338708, "tid": 2379421, + "ts": 6345937240774.798, "dur": 163.451, + "args": { + "External id": 977717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345937240782.117, "dur": 10.989, + "args": { + "External id": 977718,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937240788.869, "dur": 1.171, + "args": { + "External id": 977719,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937240796.085, "dur": 10.759, + "args": { + "External id": 977720,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937240802.801, "dur": 3.047, + "args": { + "External id": 977721,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345937240809.131, "dur": 4.873, + "args": { + "External id": 977722,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937240813.085, "dur": 0.521, + "args": { + "External id": 977723,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937240814.632, "dur": 5.408, + "args": { + "External id": 977724,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937240818.793, "dur": 0.597, + "args": { + "External id": 977725,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937240825.410, "dur": 4.537, + "args": { + "External id": 977726,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937240828.781, "dur": 0.819, + "args": { + "External id": 977727,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937240833.154, "dur": 8.558, + "args": { + "External id": 977728,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937240839.285, "dur": 2.223, + "args": { + "External id": 977729,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937240842.546, "dur": 4.766, + "args": { + "External id": 977730,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937240846.471, "dur": 0.443, + "args": { + "External id": 977731,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937240847.921, "dur": 2.270, + "args": { + "External id": 977732,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937240848.848, "dur": 1.198, + "args": { + "External id": 977733,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345937240852.456, "dur": 68.405, + "args": { + "External id": 977734,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937240922.914, "dur": 3.999, + "args": { + "External id": 977735,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937240930.591, "dur": 2.357, + "args": { + "External id": 977736,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937240931.958, "dur": 0.551, + "args": { + "External id": 977737,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937240935.631, "dur": 1.290, + "args": { + "External id": 977738,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937240991.886, "dur": 11.450, + "args": { + "External id": 977739,"Record function id": 0, "Ev Idx": 330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937240994.188, "dur": 8.261, + "args": { + "External id": 977740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937240997.188, "dur": 4.113, + "args": { + "External id": 977741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937240998.362, "dur": 2.801, + "args": { + "External id": 977742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241029.052, "dur": 13.979, + "args": { + "External id": 977743,"Record function id": 0, "Sequence number": 10552432, "Fwd thread id": 1, "Ev Idx": 334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241031.442, "dur": 8.324, + "args": { + "External id": 977744,"Sequence number": 10552432, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 335 + } + }, + { + "ph": "f", "id": 34, "pid": 2338708, "tid": 2379421, "ts": 6345937241031.442, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937241036.372, "dur": 3.145, + "args": { + "External id": 977745,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937241037.362, "dur": 1.861, + "args": { + "External id": 977746,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241049.335, "dur": 209.445, + "args": { + "External id": 977747,"Record function id": 0, "Sequence number": 10552431, "Fwd thread id": 1, "Ev Idx": 338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241050.417, "dur": 197.567, + "args": { + "External id": 977748,"Sequence number": 10552431, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 339 + } + }, + { + "ph": "f", "id": 35, "pid": 2338708, "tid": 2379421, "ts": 6345937241050.417, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937241096.028, "dur": 6.823, + "args": { + "External id": 977749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937241098.231, "dur": 3.742, + "args": { + "External id": 977750,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937241100.158, "dur": 1.384, + "args": { + "External id": 977751,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937241104.177, "dur": 75.645, + "args": { + "External id": 977752,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937241181.176, "dur": 6.726, + "args": { + "External id": 977753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937241182.255, "dur": 4.943, + "args": { + "External id": 977754,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937241185.980, "dur": 1.016, + "args": { + "External id": 977755,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937241190.076, "dur": 5.853, + "args": { + "External id": 977756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937241191.422, "dur": 3.841, + "args": { + "External id": 977757,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937241194.766, "dur": 0.376, + "args": { + "External id": 977758,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937241196.466, "dur": 50.472, + "args": { + "External id": 977759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241267.104, "dur": 10.934, + "args": { + "External id": 977760,"Record function id": 0, "Sequence number": 10552430, "Fwd thread id": 1, "Ev Idx": 351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241268.656, "dur": 7.061, + "args": { + "External id": 977761,"Sequence number": 10552430, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 352 + } + }, + { + "ph": "f", "id": 36, "pid": 2338708, "tid": 2379421, "ts": 6345937241268.656, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937241270.179, "dur": 5.360, + "args": { + "External id": 977762,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937241271.235, "dur": 4.126, + "args": { + "External id": 977763,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241282.535, "dur": 14.724, + "args": { + "External id": 977764,"Record function id": 0, "Sequence number": 10552429, "Fwd thread id": 1, "Ev Idx": 355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241286.319, "dur": 7.930, + "args": { + "External id": 977765,"Sequence number": 10552429, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 356 + } + }, + { + "ph": "f", "id": 37, "pid": 2338708, "tid": 2379421, "ts": 6345937241286.319, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937241287.772, "dur": 6.207, + "args": { + "External id": 977766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937241288.810, "dur": 4.622, + "args": { + "External id": 977767,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937241292.752, "dur": 0.511, + "args": { + "External id": 977768,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937241302.634, "dur": 7.843, + "args": { + "External id": 977769,"Record function id": 0, "Ev Idx": 360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937241304.506, "dur": 5.284, + "args": { + "External id": 977770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937241306.365, "dur": 3.036, + "args": { + "External id": 977771,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937241307.467, "dur": 1.797, + "args": { + "External id": 977772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241317.059, "dur": 10.216, + "args": { + "External id": 977773,"Record function id": 0, "Sequence number": 10552428, "Fwd thread id": 1, "Ev Idx": 364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241318.305, "dur": 6.173, + "args": { + "External id": 977774,"Sequence number": 10552428, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 365 + } + }, + { + "ph": "f", "id": 38, "pid": 2338708, "tid": 2379421, "ts": 6345937241318.305, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937241319.409, "dur": 4.874, + "args": { + "External id": 977775,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937241323.049, "dur": 1.059, + "args": { + "External id": 977776,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241331.117, "dur": 141.251, + "args": { + "External id": 977777,"Record function id": 0, "Sequence number": 10552427, "Fwd thread id": 1, "Ev Idx": 368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241332.042, "dur": 130.568, + "args": { + "External id": 977778,"Sequence number": 10552427, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 369 + } + }, + { + "ph": "f", "id": 39, "pid": 2338708, "tid": 2379421, "ts": 6345937241332.042, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937241334.660, "dur": 3.203, + "args": { + "External id": 977779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937241335.584, "dur": 1.773, + "args": { + "External id": 977780,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937241336.517, "dur": 0.709, + "args": { + "External id": 977781,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937241340.602, "dur": 53.355, + "args": { + "External id": 977782,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937241395.255, "dur": 6.954, + "args": { + "External id": 977783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937241396.154, "dur": 5.351, + "args": { + "External id": 977784,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937241400.169, "dur": 1.197, + "args": { + "External id": 977785,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937241403.386, "dur": 5.742, + "args": { + "External id": 977786,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937241404.526, "dur": 4.108, + "args": { + "External id": 977787,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937241407.984, "dur": 0.558, + "args": { + "External id": 977788,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937241409.870, "dur": 51.928, + "args": { + "External id": 977789,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241477.895, "dur": 41.610, + "args": { + "External id": 977790,"Record function id": 0, "Sequence number": 10552426, "Fwd thread id": 1, "Ev Idx": 381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241479.151, "dur": 6.482, + "args": { + "External id": 977791,"Sequence number": 10552426, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 382 + } + }, + { + "ph": "f", "id": 40, "pid": 2338708, "tid": 2379421, "ts": 6345937241479.151, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937241480.709, "dur": 4.753, + "args": { + "External id": 977792,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937241483.783, "dur": 1.519, + "args": { + "External id": 977793,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345937241489.959, "dur": 26.395, + "args": { + "External id": 977794,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241523.974, "dur": 11.559, + "args": { + "External id": 977795,"Record function id": 0, "Sequence number": 10552425, "Fwd thread id": 1, "Ev Idx": 386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937241525.318, "dur": 7.465, + "args": { + "External id": 977796,"Sequence number": 10552425, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 387 + } + }, + { + "ph": "f", "id": 41, "pid": 2338708, "tid": 2379421, "ts": 6345937241525.318, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937241529.019, "dur": 3.506, + "args": { + "External id": 977797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937241529.936, "dur": 2.010, + "args": { + "External id": 977798,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937241531.100, "dur": 0.661, + "args": { + "External id": 977799,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937241540.220, "dur": 6.441, + "args": { + "External id": 977800,"Record function id": 0, "Ev Idx": 391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937241541.866, "dur": 4.285, + "args": { + "External id": 977801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937241543.037, "dur": 2.749, + "args": { + "External id": 977802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937241543.970, "dur": 1.707, + "args": { + "External id": 977803,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937241551.250, "dur": 563.803, + "args": { + "External id": 977804,"Record function id": 0, "Sequence number": 10552424, "Fwd thread id": 1, "Ev Idx": 395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937241552.850, "dur": 543.254, + "args": { + "External id": 977805,"Sequence number": 10552424, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 396 + } + }, + { + "ph": "f", "id": 42, "pid": 2338708, "tid": 2379421, "ts": 6345937241552.850, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2379421, + "ts": 6345937241586.097, "dur": 39.422, + "args": { + "External id": 977806,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345937241587.926, "dur": 37.352, + "args": { + "External id": 977807,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937241590.863, "dur": 7.449, + "args": { + "External id": 977808,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937241593.747, "dur": 3.964, + "args": { + "External id": 977809,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937241600.077, "dur": 24.420, + "args": { + "External id": 977810,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937241641.842, "dur": 2.545, + "args": { + "External id": 977811,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937241642.746, "dur": 1.480, + "args": { + "External id": 977812,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937241649.390, "dur": 2.166, + "args": { + "External id": 977813,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937241650.231, "dur": 1.212, + "args": { + "External id": 977814,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937241670.917, "dur": 4.994, + "args": { + "External id": 977815,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937241688.329, "dur": 2.702, + "args": { + "External id": 977816,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937241900.294, "dur": 2.964, + "args": { + "External id": 977817,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937241908.245, "dur": 40.334, + "args": { + "External id": 977818,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937241922.438, "dur": 0.931, + "args": { + "External id": 977819,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937241955.658, "dur": 38.027, + "args": { + "External id": 977820,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937241957.737, "dur": 35.698, + "args": { + "External id": 977821,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937241962.439, "dur": 6.942, + "args": { + "External id": 977822,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937241973.581, "dur": 19.153, + "args": { + "External id": 977823,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345937241998.925, "dur": 3.046, + "args": { + "External id": 977824,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937242000.368, "dur": 1.421, + "args": { + "External id": 977825,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937242032.554, "dur": 6.324, + "args": { + "External id": 977826,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937242036.436, "dur": 2.147, + "args": { + "External id": 977827,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937242041.751, "dur": 2.009, + "args": { + "External id": 977828,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937242042.391, "dur": 1.259, + "args": { + "External id": 977829,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937242132.038, "dur": 11.840, + "args": { + "External id": 977830,"Record function id": 0, "Ev Idx": 421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937242134.720, "dur": 8.184, + "args": { + "External id": 977831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937242137.498, "dur": 4.348, + "args": { + "External id": 977832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937242138.938, "dur": 2.791, + "args": { + "External id": 977833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937242148.148, "dur": 12.379, + "args": { + "External id": 977834,"Record function id": 0, "Sequence number": 10552423, "Fwd thread id": 1, "Ev Idx": 425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937242149.384, "dur": 7.814, + "args": { + "External id": 977835,"Sequence number": 10552423, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 426 + } + }, + { + "ph": "f", "id": 43, "pid": 2338708, "tid": 2379421, "ts": 6345937242149.384, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937242153.919, "dur": 2.986, + "args": { + "External id": 977836,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937242154.652, "dur": 2.128, + "args": { + "External id": 977837,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937242167.706, "dur": 183.406, + "args": { + "External id": 977838,"Record function id": 0, "Sequence number": 10552422, "Fwd thread id": 1, "Ev Idx": 429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937242168.650, "dur": 177.722, + "args": { + "External id": 977839,"Sequence number": 10552422, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 430 + } + }, + { + "ph": "f", "id": 44, "pid": 2338708, "tid": 2379421, "ts": 6345937242168.650, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937242172.726, "dur": 6.146, + "args": { + "External id": 977840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937242174.753, "dur": 3.419, + "args": { + "External id": 977841,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937242176.878, "dur": 1.014, + "args": { + "External id": 977842,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937242180.353, "dur": 91.123, + "args": { + "External id": 977843,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937242272.880, "dur": 12.115, + "args": { + "External id": 977844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937242274.031, "dur": 10.078, + "args": { + "External id": 977845,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937242278.222, "dur": 5.686, + "args": { + "External id": 977846,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937242286.812, "dur": 5.598, + "args": { + "External id": 977847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937242288.271, "dur": 3.687, + "args": { + "External id": 977848,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937242291.526, "dur": 0.296, + "args": { + "External id": 977849,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937242293.194, "dur": 52.364, + "args": { + "External id": 977850,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937242356.992, "dur": 10.701, + "args": { + "External id": 977851,"Record function id": 0, "Sequence number": 10552421, "Fwd thread id": 1, "Ev Idx": 442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937242358.060, "dur": 6.751, + "args": { + "External id": 977852,"Sequence number": 10552421, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 443 + } + }, + { + "ph": "f", "id": 45, "pid": 2338708, "tid": 2379421, "ts": 6345937242358.060, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937242359.706, "dur": 4.933, + "args": { + "External id": 977853,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937242363.011, "dur": 1.485, + "args": { + "External id": 977854,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937242371.824, "dur": 10.676, + "args": { + "External id": 977855,"Record function id": 0, "Sequence number": 10552420, "Fwd thread id": 1, "Ev Idx": 446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937242373.003, "dur": 7.071, + "args": { + "External id": 977856,"Sequence number": 10552420, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 447 + } + }, + { + "ph": "f", "id": 46, "pid": 2338708, "tid": 2379421, "ts": 6345937242373.003, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937242374.165, "dur": 5.654, + "args": { + "External id": 977857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937242374.830, "dur": 4.442, + "args": { + "External id": 977858,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937242378.546, "dur": 0.600, + "args": { + "External id": 977859,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937242387.181, "dur": 6.541, + "args": { + "External id": 977860,"Record function id": 0, "Ev Idx": 451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937242388.764, "dur": 4.276, + "args": { + "External id": 977861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937242390.280, "dur": 2.483, + "args": { + "External id": 977862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937242391.315, "dur": 1.338, + "args": { + "External id": 977863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937242397.833, "dur": 9.879, + "args": { + "External id": 977864,"Record function id": 0, "Sequence number": 10552419, "Fwd thread id": 1, "Ev Idx": 455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937242401.614, "dur": 3.826, + "args": { + "External id": 977865,"Sequence number": 10552419, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 456 + } + }, + { + "ph": "f", "id": 47, "pid": 2338708, "tid": 2379421, "ts": 6345937242401.614, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937242402.671, "dur": 2.583, + "args": { + "External id": 977866,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937242403.717, "dur": 1.369, + "args": { + "External id": 977867,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937242415.525, "dur": 435.696, + "args": { + "External id": 977868,"Record function id": 0, "Sequence number": 10552418, "Fwd thread id": 1, "Ev Idx": 459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937242416.892, "dur": 413.893, + "args": { + "External id": 977869,"Sequence number": 10552418, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 460 + } + }, + { + "ph": "f", "id": 48, "pid": 2338708, "tid": 2379421, "ts": 6345937242416.892, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937242435.218, "dur": 14.328, + "args": { + "External id": 977870,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937242443.633, "dur": 5.226, + "args": { + "External id": 977871,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937242451.865, "dur": 6.643, + "args": { + "External id": 977872,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937242453.018, "dur": 5.280, + "args": { + "External id": 977873,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937242460.104, "dur": 6.030, + "args": { + "External id": 977874,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937242463.634, "dur": 2.287, + "args": { + "External id": 977875,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937242501.215, "dur": 300.292, + "args": { + "External id": 977876,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937242594.633, "dur": 3.878, + "args": { + "External id": 977877,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937242602.631, "dur": 5.102, + "args": { + "External id": 977878,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937242611.056, "dur": 1.990, + "args": { + "External id": 977879,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937242614.002, "dur": 2.099, + "args": { + "External id": 977880,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937242681.328, "dur": 2.823, + "args": { + "External id": 977881,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937242682.287, "dur": 1.686, + "args": { + "External id": 977882,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937242685.932, "dur": 33.842, + "args": { + "External id": 977883,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937242691.829, "dur": 3.111, + "args": { + "External id": 977884,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937242721.454, "dur": 1.726, + "args": { + "External id": 977885,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937242722.278, "dur": 0.790, + "args": { + "External id": 977886,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937242728.427, "dur": 22.107, + "args": { + "External id": 977887,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937242732.343, "dur": 2.964, + "args": { + "External id": 977888,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937242815.945, "dur": 4.437, + "args": { + "External id": 977889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937242823.704, "dur": 0.883, + "args": { + "External id": 977890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937242827.161, "dur": 0.661, + "args": { + "External id": 977891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937242860.391, "dur": 340.779, + "args": { + "External id": 977892,"Record function id": 0, "Sequence number": 10552417, "Fwd thread id": 1, "Ev Idx": 483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937242862.135, "dur": 327.664, + "args": { + "External id": 977893,"Sequence number": 10552417, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 484 + } + }, + { + "ph": "f", "id": 49, "pid": 2338708, "tid": 2379421, "ts": 6345937242862.135, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345937242885.867, "dur": 49.960, + "args": { + "External id": 977894,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937242888.965, "dur": 3.777, + "args": { + "External id": 977895,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937242894.638, "dur": 40.508, + "args": { + "External id": 977896,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937242947.192, "dur": 7.411, + "args": { + "External id": 977897,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937242951.252, "dur": 2.979, + "args": { + "External id": 977898,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937243213.820, "dur": 219.847, + "args": { + "External id": 977899,"Record function id": 0, "Sequence number": 10552416, "Fwd thread id": 1, "Ev Idx": 490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937243216.508, "dur": 208.937, + "args": { + "External id": 977900,"Sequence number": 10552416, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 491 + } + }, + { + "ph": "f", "id": 50, "pid": 2338708, "tid": 2379421, "ts": 6345937243216.508, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345937243233.015, "dur": 59.130, + "args": { + "External id": 977901,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937243236.188, "dur": 4.987, + "args": { + "External id": 977902,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937243242.408, "dur": 49.204, + "args": { + "External id": 977903,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937243301.103, "dur": 10.036, + "args": { + "External id": 977904,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937243306.957, "dur": 3.820, + "args": { + "External id": 977905,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243442.282, "dur": 17.680, + "args": { + "External id": 977906,"Record function id": 0, "Sequence number": 10552415, "Fwd thread id": 1, "Ev Idx": 497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243446.789, "dur": 10.317, + "args": { + "External id": 977907,"Sequence number": 10552415, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 498 + } + }, + { + "ph": "f", "id": 51, "pid": 2338708, "tid": 2379421, "ts": 6345937243446.789, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937243449.743, "dur": 7.056, + "args": { + "External id": 977908,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937243451.090, "dur": 5.497, + "args": { + "External id": 977909,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243464.506, "dur": 12.654, + "args": { + "External id": 977910,"Record function id": 0, "Sequence number": 10552414, "Fwd thread id": 1, "Ev Idx": 501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243465.776, "dur": 8.753, + "args": { + "External id": 977911,"Sequence number": 10552414, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 502 + } + }, + { + "ph": "f", "id": 52, "pid": 2338708, "tid": 2379421, "ts": 6345937243465.776, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937243467.158, "dur": 7.204, + "args": { + "External id": 977912,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937243470.323, "dur": 3.811, + "args": { + "External id": 977913,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243480.795, "dur": 8.969, + "args": { + "External id": 977914,"Record function id": 0, "Sequence number": 10552413, "Fwd thread id": 1, "Ev Idx": 505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243481.970, "dur": 5.362, + "args": { + "External id": 977915,"Sequence number": 10552413, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 506 + } + }, + { + "ph": "f", "id": 53, "pid": 2338708, "tid": 2379421, "ts": 6345937243481.970, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937243483.364, "dur": 3.780, + "args": { + "External id": 977916,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937243486.059, "dur": 0.932, + "args": { + "External id": 977917,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243493.709, "dur": 39.408, + "args": { + "External id": 977918,"Record function id": 0, "Sequence number": 10552412, "Fwd thread id": 1, "Ev Idx": 509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243495.201, "dur": 35.256, + "args": { + "External id": 977919,"Sequence number": 10552412, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 510 + } + }, + { + "ph": "f", "id": 54, "pid": 2338708, "tid": 2379421, "ts": 6345937243495.201, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937243498.734, "dur": 31.524, + "args": { + "External id": 977920,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937243529.038, "dur": 1.097, + "args": { + "External id": 977921,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243537.425, "dur": 183.448, + "args": { + "External id": 977922,"Record function id": 0, "Sequence number": 10552411, "Fwd thread id": 1, "Ev Idx": 513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243538.403, "dur": 173.798, + "args": { + "External id": 977923,"Sequence number": 10552411, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 514 + } + }, + { + "ph": "f", "id": 55, "pid": 2338708, "tid": 2379421, "ts": 6345937243538.403, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937243545.077, "dur": 7.264, + "args": { + "External id": 977924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937243547.540, "dur": 4.004, + "args": { + "External id": 977925,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937243549.589, "dur": 1.527, + "args": { + "External id": 977926,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937243554.039, "dur": 85.376, + "args": { + "External id": 977927,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937243641.115, "dur": 8.931, + "args": { + "External id": 977928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937243641.853, "dur": 7.320, + "args": { + "External id": 977929,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937243645.883, "dur": 3.037, + "args": { + "External id": 977930,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937243652.103, "dur": 5.962, + "args": { + "External id": 977931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937243653.216, "dur": 4.336, + "args": { + "External id": 977932,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937243656.731, "dur": 0.727, + "args": { + "External id": 977933,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937243658.736, "dur": 52.310, + "args": { + "External id": 977934,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243726.701, "dur": 11.595, + "args": { + "External id": 977935,"Record function id": 0, "Sequence number": 10552410, "Fwd thread id": 1, "Ev Idx": 526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243727.811, "dur": 8.210, + "args": { + "External id": 977936,"Sequence number": 10552410, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 527 + } + }, + { + "ph": "f", "id": 56, "pid": 2338708, "tid": 2379421, "ts": 6345937243727.811, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937243729.428, "dur": 6.426, + "args": { + "External id": 977937,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937243732.150, "dur": 3.593, + "args": { + "External id": 977938,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243742.310, "dur": 10.683, + "args": { + "External id": 977939,"Record function id": 0, "Sequence number": 10552409, "Fwd thread id": 1, "Ev Idx": 530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243743.457, "dur": 6.551, + "args": { + "External id": 977940,"Sequence number": 10552409, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 531 + } + }, + { + "ph": "f", "id": 57, "pid": 2338708, "tid": 2379421, "ts": 6345937243743.457, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937243744.303, "dur": 5.451, + "args": { + "External id": 977941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937243744.949, "dur": 4.226, + "args": { + "External id": 977942,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937243748.657, "dur": 0.390, + "args": { + "External id": 977943,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937243759.732, "dur": 13.398, + "args": { + "External id": 977944,"Record function id": 0, "Ev Idx": 535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937243761.342, "dur": 10.838, + "args": { + "External id": 977945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937243764.350, "dur": 7.288, + "args": { + "External id": 977946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937243768.396, "dur": 3.101, + "args": { + "External id": 977947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243779.349, "dur": 5.999, + "args": { + "External id": 977948,"Record function id": 0, "Sequence number": 10552408, "Fwd thread id": 1, "Ev Idx": 539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243780.418, "dur": 2.760, + "args": { + "External id": 977949,"Sequence number": 10552408, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 540 + } + }, + { + "ph": "f", "id": 58, "pid": 2338708, "tid": 2379421, "ts": 6345937243780.418, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937243781.373, "dur": 1.618, + "args": { + "External id": 977950,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937243781.811, "dur": 1.014, + "args": { + "External id": 977951,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243789.137, "dur": 108.230, + "args": { + "External id": 977952,"Record function id": 0, "Sequence number": 10552407, "Fwd thread id": 1, "Ev Idx": 543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243790.110, "dur": 98.957, + "args": { + "External id": 977953,"Sequence number": 10552407, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 544 + } + }, + { + "ph": "f", "id": 59, "pid": 2338708, "tid": 2379421, "ts": 6345937243790.110, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937243792.278, "dur": 4.920, + "args": { + "External id": 977954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937243792.937, "dur": 3.748, + "args": { + "External id": 977955,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937243796.030, "dur": 0.521, + "args": { + "External id": 977956,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937243797.840, "dur": 32.355, + "args": { + "External id": 977957,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937243831.472, "dur": 6.385, + "args": { + "External id": 977958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937243832.078, "dur": 5.005, + "args": { + "External id": 977959,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937243835.850, "dur": 1.087, + "args": { + "External id": 977960,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937243839.051, "dur": 4.774, + "args": { + "External id": 977961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937243842.179, "dur": 1.148, + "args": { + "External id": 977962,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937243842.877, "dur": 0.340, + "args": { + "External id": 977963,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937243844.372, "dur": 43.582, + "args": { + "External id": 977964,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243902.896, "dur": 40.567, + "args": { + "External id": 977965,"Record function id": 0, "Sequence number": 10552406, "Fwd thread id": 1, "Ev Idx": 556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243904.051, "dur": 6.554, + "args": { + "External id": 977966,"Sequence number": 10552406, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 557 + } + }, + { + "ph": "f", "id": 60, "pid": 2338708, "tid": 2379421, "ts": 6345937243904.051, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937243905.636, "dur": 4.803, + "args": { + "External id": 977967,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937243909.020, "dur": 1.261, + "args": { + "External id": 977968,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345937243914.030, "dur": 26.652, + "args": { + "External id": 977969,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243948.438, "dur": 10.543, + "args": { + "External id": 977970,"Record function id": 0, "Sequence number": 10552405, "Fwd thread id": 1, "Ev Idx": 561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243952.002, "dur": 4.783, + "args": { + "External id": 977971,"Sequence number": 10552405, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 562 + } + }, + { + "ph": "f", "id": 61, "pid": 2338708, "tid": 2379421, "ts": 6345937243952.002, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937243953.104, "dur": 3.426, + "args": { + "External id": 977972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937243953.896, "dur": 1.973, + "args": { + "External id": 977973,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937243955.238, "dur": 0.483, + "args": { + "External id": 977974,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937243963.640, "dur": 5.511, + "args": { + "External id": 977975,"Record function id": 0, "Ev Idx": 566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937243965.076, "dur": 3.476, + "args": { + "External id": 977976,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937243966.456, "dur": 1.783, + "args": { + "External id": 977977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937243966.964, "dur": 1.187, + "args": { + "External id": 977978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243973.292, "dur": 10.928, + "args": { + "External id": 977979,"Record function id": 0, "Sequence number": 10552404, "Fwd thread id": 1, "Ev Idx": 570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243974.385, "dur": 7.866, + "args": { + "External id": 977980,"Sequence number": 10552404, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 571 + } + }, + { + "ph": "f", "id": 62, "pid": 2338708, "tid": 2379421, "ts": 6345937243974.385, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937243978.055, "dur": 4.026, + "args": { + "External id": 977981,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937243980.907, "dur": 1.032, + "args": { + "External id": 977982,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243987.914, "dur": 198.764, + "args": { + "External id": 977983,"Record function id": 0, "Sequence number": 10552403, "Fwd thread id": 1, "Ev Idx": 574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937243988.781, "dur": 186.841, + "args": { + "External id": 977984,"Sequence number": 10552403, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 575 + } + }, + { + "ph": "f", "id": 63, "pid": 2338708, "tid": 2379421, "ts": 6345937243988.781, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937243990.695, "dur": 2.404, + "args": { + "External id": 977985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937243991.137, "dur": 1.466, + "args": { + "External id": 977986,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937243992.028, "dur": 0.452, + "args": { + "External id": 977987,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937243999.719, "dur": 108.584, + "args": { + "External id": 977988,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937244111.467, "dur": 9.327, + "args": { + "External id": 977989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937244112.512, "dur": 7.301, + "args": { + "External id": 977990,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937244114.038, "dur": 5.595, + "args": { + "External id": 977991,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937244122.448, "dur": 5.042, + "args": { + "External id": 977992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937244123.458, "dur": 3.486, + "args": { + "External id": 977993,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937244126.383, "dur": 0.410, + "args": { + "External id": 977994,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937244129.964, "dur": 44.752, + "args": { + "External id": 977995,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937244195.311, "dur": 31.767, + "args": { + "External id": 977996,"Record function id": 0, "Sequence number": 10552402, "Fwd thread id": 1, "Ev Idx": 587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937244196.521, "dur": 3.987, + "args": { + "External id": 977997,"Sequence number": 10552402, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 588 + } + }, + { + "ph": "f", "id": 64, "pid": 2338708, "tid": 2379421, "ts": 6345937244196.521, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937244197.813, "dur": 2.524, + "args": { + "External id": 977998,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937244198.458, "dur": 1.713, + "args": { + "External id": 977999,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937244203.757, "dur": 20.843, + "args": { + "External id": 978000,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937244231.878, "dur": 13.782, + "args": { + "External id": 978001,"Record function id": 0, "Sequence number": 10552401, "Fwd thread id": 1, "Ev Idx": 592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937244233.108, "dur": 9.837, + "args": { + "External id": 978002,"Sequence number": 10552401, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 593 + } + }, + { + "ph": "f", "id": 65, "pid": 2338708, "tid": 2379421, "ts": 6345937244233.108, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937244234.118, "dur": 8.584, + "args": { + "External id": 978003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937244235.084, "dur": 6.927, + "args": { + "External id": 978004,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937244241.335, "dur": 0.543, + "args": { + "External id": 978005,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937244250.586, "dur": 6.321, + "args": { + "External id": 978006,"Record function id": 0, "Ev Idx": 597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937244252.068, "dur": 4.169, + "args": { + "External id": 978007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937244253.499, "dur": 2.352, + "args": { + "External id": 978008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937244254.070, "dur": 1.649, + "args": { + "External id": 978009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937244261.907, "dur": 430.569, + "args": { + "External id": 978010,"Record function id": 0, "Sequence number": 10552400, "Fwd thread id": 1, "Ev Idx": 601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937244263.512, "dur": 389.491, + "args": { + "External id": 978011,"Sequence number": 10552400, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 602 + } + }, + { + "ph": "f", "id": 66, "pid": 2338708, "tid": 2379421, "ts": 6345937244263.512, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937244304.658, "dur": 2.474, + "args": { + "External id": 978012,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937244305.463, "dur": 1.478, + "args": { + "External id": 978013,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937244327.154, "dur": 4.591, + "args": { + "External id": 978014,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937244342.704, "dur": 4.418, + "args": { + "External id": 978015,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937244533.419, "dur": 2.110, + "args": { + "External id": 978016,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937244539.932, "dur": 41.711, + "args": { + "External id": 978017,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937244551.231, "dur": 1.055, + "args": { + "External id": 978018,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937244587.907, "dur": 38.519, + "args": { + "External id": 978019,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937244590.049, "dur": 36.142, + "args": { + "External id": 978020,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937244596.835, "dur": 6.600, + "args": { + "External id": 978021,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937244605.484, "dur": 19.907, + "args": { + "External id": 978022,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345937244633.515, "dur": 2.642, + "args": { + "External id": 978023,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937244634.648, "dur": 1.375, + "args": { + "External id": 978024,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937244643.083, "dur": 2.086, + "args": { + "External id": 978025,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937244643.903, "dur": 1.101, + "args": { + "External id": 978026,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345937244664.126, "dur": 20.527, + "args": { + "External id": 978027,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937244703.551, "dur": 10.836, + "args": { + "External id": 978028,"Record function id": 0, "Ev Idx": 619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937244707.936, "dur": 5.694, + "args": { + "External id": 978029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937244709.740, "dur": 2.692, + "args": { + "External id": 978030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937244710.497, "dur": 1.814, + "args": { + "External id": 978031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937244721.559, "dur": 5.706, + "args": { + "External id": 978032,"Record function id": 0, "Sequence number": 10552399, "Fwd thread id": 1, "Ev Idx": 623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937244722.853, "dur": 1.393, + "args": { + "External id": 978033,"Sequence number": 10552399, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 624 + } + }, + { + "ph": "f", "id": 67, "pid": 2338708, "tid": 2379421, "ts": 6345937244722.853, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937244731.647, "dur": 538.288, + "args": { + "External id": 978034,"Record function id": 0, "Sequence number": 10552398, "Fwd thread id": 1, "Ev Idx": 625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937244733.050, "dur": 522.025, + "args": { + "External id": 978035,"Sequence number": 10552398, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 626 + } + }, + { + "ph": "f", "id": 68, "pid": 2338708, "tid": 2379421, "ts": 6345937244733.050, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937244768.298, "dur": 8.896, + "args": { + "External id": 978036,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937244773.580, "dur": 3.323, + "args": { + "External id": 978037,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937244780.897, "dur": 6.930, + "args": { + "External id": 978038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937244782.223, "dur": 5.002, + "args": { + "External id": 978039,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937244786.205, "dur": 0.856, + "args": { + "External id": 978040,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2379421, + "ts": 6345937244794.384, "dur": 91.713, + "args": { + "External id": 978041,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937244795.339, "dur": 6.019, + "args": { + "External id": 978042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937244795.992, "dur": 4.863, + "args": { + "External id": 978043,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937244796.812, "dur": 3.895, + "args": { + "External id": 978044,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2379421, + "ts": 6345937244802.606, "dur": 82.915, + "args": { + "External id": 978045,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937244804.011, "dur": 80.386, + "args": { + "External id": 978046,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345937244890.223, "dur": 5.619, + "args": { + "External id": 978047,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937244893.913, "dur": 1.775, + "args": { + "External id": 978048,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937244930.504, "dur": 5.647, + "args": { + "External id": 978049,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937244939.640, "dur": 1.591, + "args": { + "External id": 978050,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937244941.993, "dur": 1.653, + "args": { + "External id": 978051,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937244978.747, "dur": 2.308, + "args": { + "External id": 978052,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937244979.371, "dur": 1.492, + "args": { + "External id": 978053,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338708, "tid": 2379421, + "ts": 6345937245007.312, "dur": 223.066, + "args": { + "External id": 978054,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345937245036.518, "dur": 10.701, + "args": { + "External id": 978055,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245042.635, "dur": 3.283, + "args": { + "External id": 978056,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937245048.853, "dur": 50.639, + "args": { + "External id": 978057,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245094.513, "dur": 3.317, + "args": { + "External id": 978058,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345937245101.534, "dur": 2.225, + "args": { + "External id": 978059,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245102.817, "dur": 0.561, + "args": { + "External id": 978060,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937245106.805, "dur": 2.138, + "args": { + "External id": 978061,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245107.743, "dur": 0.562, + "args": { + "External id": 978062,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937245115.213, "dur": 2.228, + "args": { + "External id": 978063,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245116.600, "dur": 0.493, + "args": { + "External id": 978064,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937245121.347, "dur": 8.601, + "args": { + "External id": 978065,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937245127.167, "dur": 2.615, + "args": { + "External id": 978066,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937245130.767, "dur": 3.852, + "args": { + "External id": 978067,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245133.884, "dur": 0.349, + "args": { + "External id": 978068,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937245135.114, "dur": 3.700, + "args": { + "External id": 978069,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937245135.607, "dur": 3.102, + "args": { + "External id": 978070,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345937245140.174, "dur": 71.934, + "args": { + "External id": 978071,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937245216.708, "dur": 3.778, + "args": { + "External id": 978072,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937245221.220, "dur": 4.731, + "args": { + "External id": 978073,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245224.807, "dur": 0.640, + "args": { + "External id": 978074,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937245228.106, "dur": 1.089, + "args": { + "External id": 978075,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937245284.231, "dur": 10.372, + "args": { + "External id": 978076,"Record function id": 0, "Ev Idx": 667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937245286.750, "dur": 7.031, + "args": { + "External id": 978077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937245289.308, "dur": 3.621, + "args": { + "External id": 978078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937245290.385, "dur": 2.383, + "args": { + "External id": 978079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245299.241, "dur": 8.575, + "args": { + "External id": 978080,"Record function id": 0, "Sequence number": 10552397, "Fwd thread id": 1, "Ev Idx": 671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245300.308, "dur": 3.891, + "args": { + "External id": 978081,"Sequence number": 10552397, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 672 + } + }, + { + "ph": "f", "id": 69, "pid": 2338708, "tid": 2379421, "ts": 6345937245300.308, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937245301.946, "dur": 1.905, + "args": { + "External id": 978082,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937245302.667, "dur": 1.028, + "args": { + "External id": 978083,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245312.130, "dur": 131.099, + "args": { + "External id": 978084,"Record function id": 0, "Sequence number": 10552396, "Fwd thread id": 1, "Ev Idx": 675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245315.460, "dur": 119.528, + "args": { + "External id": 978085,"Sequence number": 10552396, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 676 + } + }, + { + "ph": "f", "id": 70, "pid": 2338708, "tid": 2379421, "ts": 6345937245315.460, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937245320.347, "dur": 3.965, + "args": { + "External id": 978086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937245321.695, "dur": 2.008, + "args": { + "External id": 978087,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245322.783, "dur": 0.690, + "args": { + "External id": 978088,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937245325.406, "dur": 47.448, + "args": { + "External id": 978089,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937245373.983, "dur": 8.754, + "args": { + "External id": 978090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937245374.673, "dur": 7.289, + "args": { + "External id": 978091,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245380.708, "dur": 1.060, + "args": { + "External id": 978092,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937245384.204, "dur": 6.139, + "args": { + "External id": 978093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937245388.317, "dur": 1.531, + "args": { + "External id": 978094,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245389.248, "dur": 0.507, + "args": { + "External id": 978095,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937245391.057, "dur": 43.062, + "args": { + "External id": 978096,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245449.023, "dur": 11.649, + "args": { + "External id": 978097,"Record function id": 0, "Sequence number": 10552395, "Fwd thread id": 1, "Ev Idx": 688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245450.108, "dur": 8.894, + "args": { + "External id": 978098,"Sequence number": 10552395, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 689 + } + }, + { + "ph": "f", "id": 71, "pid": 2338708, "tid": 2379421, "ts": 6345937245450.108, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937245453.957, "dur": 4.835, + "args": { + "External id": 978099,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937245457.166, "dur": 1.482, + "args": { + "External id": 978100,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245464.824, "dur": 7.323, + "args": { + "External id": 978101,"Record function id": 0, "Sequence number": 10552394, "Fwd thread id": 1, "Ev Idx": 692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245466.048, "dur": 4.102, + "args": { + "External id": 978102,"Sequence number": 10552394, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 693 + } + }, + { + "ph": "f", "id": 72, "pid": 2338708, "tid": 2379421, "ts": 6345937245466.048, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937245467.251, "dur": 2.622, + "args": { + "External id": 978103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937245467.933, "dur": 1.425, + "args": { + "External id": 978104,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245468.723, "dur": 0.488, + "args": { + "External id": 978105,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937245476.913, "dur": 7.759, + "args": { + "External id": 978106,"Record function id": 0, "Ev Idx": 697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937245478.241, "dur": 5.883, + "args": { + "External id": 978107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937245479.418, "dur": 4.450, + "args": { + "External id": 978108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937245482.589, "dur": 1.132, + "args": { + "External id": 978109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245488.475, "dur": 8.857, + "args": { + "External id": 978110,"Record function id": 0, "Sequence number": 10552393, "Fwd thread id": 1, "Ev Idx": 701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245489.303, "dur": 5.390, + "args": { + "External id": 978111,"Sequence number": 10552393, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 702 + } + }, + { + "ph": "f", "id": 73, "pid": 2338708, "tid": 2379421, "ts": 6345937245489.303, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937245490.604, "dur": 3.868, + "args": { + "External id": 978112,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937245493.527, "dur": 0.777, + "args": { + "External id": 978113,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245501.087, "dur": 110.583, + "args": { + "External id": 978114,"Record function id": 0, "Sequence number": 10552392, "Fwd thread id": 1, "Ev Idx": 705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245502.131, "dur": 100.863, + "args": { + "External id": 978115,"Sequence number": 10552392, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 706 + } + }, + { + "ph": "f", "id": 74, "pid": 2338708, "tid": 2379421, "ts": 6345937245502.131, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937245504.409, "dur": 5.289, + "args": { + "External id": 978116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937245504.937, "dur": 4.266, + "args": { + "External id": 978117,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245508.383, "dur": 0.692, + "args": { + "External id": 978118,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937245510.360, "dur": 38.124, + "args": { + "External id": 978119,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937245549.769, "dur": 3.351, + "args": { + "External id": 978120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937245550.345, "dur": 2.174, + "args": { + "External id": 978121,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245551.470, "dur": 0.923, + "args": { + "External id": 978122,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937245554.417, "dur": 10.598, + "args": { + "External id": 978123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937245557.993, "dur": 6.516, + "args": { + "External id": 978124,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245561.769, "dur": 2.642, + "args": { + "External id": 978125,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937245565.670, "dur": 36.401, + "args": { + "External id": 978126,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245616.893, "dur": 34.820, + "args": { + "External id": 978127,"Record function id": 0, "Sequence number": 10552391, "Fwd thread id": 1, "Ev Idx": 718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245618.101, "dur": 4.027, + "args": { + "External id": 978128,"Sequence number": 10552391, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 719 + } + }, + { + "ph": "f", "id": 75, "pid": 2338708, "tid": 2379421, "ts": 6345937245618.101, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937245619.462, "dur": 2.474, + "args": { + "External id": 978129,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937245620.331, "dur": 1.438, + "args": { + "External id": 978130,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345937245625.251, "dur": 23.201, + "args": { + "External id": 978131,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245656.223, "dur": 10.999, + "args": { + "External id": 978132,"Record function id": 0, "Sequence number": 10552390, "Fwd thread id": 1, "Ev Idx": 723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937245657.148, "dur": 7.706, + "args": { + "External id": 978133,"Sequence number": 10552390, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 724 + } + }, + { + "ph": "f", "id": 76, "pid": 2338708, "tid": 2379421, "ts": 6345937245657.148, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937245660.773, "dur": 3.794, + "args": { + "External id": 978134,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937245661.708, "dur": 2.240, + "args": { + "External id": 978135,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937245662.818, "dur": 0.945, + "args": { + "External id": 978136,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937245673.699, "dur": 6.021, + "args": { + "External id": 978137,"Record function id": 0, "Ev Idx": 728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937245675.219, "dur": 3.890, + "args": { + "External id": 978138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937245676.477, "dur": 2.301, + "args": { + "External id": 978139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937245677.509, "dur": 1.143, + "args": { + "External id": 978140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937245686.819, "dur": 548.698, + "args": { + "External id": 978141,"Record function id": 0, "Sequence number": 10552389, "Fwd thread id": 1, "Ev Idx": 732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937245688.240, "dur": 507.196, + "args": { + "External id": 978142,"Sequence number": 10552389, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 733 + } + }, + { + "ph": "f", "id": 77, "pid": 2338708, "tid": 2379421, "ts": 6345937245688.240, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2379421, + "ts": 6345937245713.513, "dur": 38.146, + "args": { + "External id": 978143,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345937245715.381, "dur": 36.040, + "args": { + "External id": 978144,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937245718.116, "dur": 6.420, + "args": { + "External id": 978145,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937245720.906, "dur": 3.030, + "args": { + "External id": 978146,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937245726.222, "dur": 24.627, + "args": { + "External id": 978147,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937245766.109, "dur": 4.863, + "args": { + "External id": 978148,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937245769.333, "dur": 1.456, + "args": { + "External id": 978149,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937245776.618, "dur": 3.658, + "args": { + "External id": 978150,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937245777.211, "dur": 2.927, + "args": { + "External id": 978151,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937245797.072, "dur": 2.700, + "args": { + "External id": 978152,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937245812.178, "dur": 2.450, + "args": { + "External id": 978153,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937245991.234, "dur": 2.057, + "args": { + "External id": 978154,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937245997.697, "dur": 104.118, + "args": { + "External id": 978155,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937246034.238, "dur": 1.575, + "args": { + "External id": 978156,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937246110.802, "dur": 40.203, + "args": { + "External id": 978157,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937246112.829, "dur": 37.924, + "args": { + "External id": 978158,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937246117.984, "dur": 7.279, + "args": { + "External id": 978159,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937246129.508, "dur": 20.569, + "args": { + "External id": 978160,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345937246156.386, "dur": 5.188, + "args": { + "External id": 978161,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937246159.569, "dur": 1.862, + "args": { + "External id": 978162,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937246169.813, "dur": 2.251, + "args": { + "External id": 978163,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937246170.754, "dur": 1.194, + "args": { + "External id": 978164,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937246174.585, "dur": 7.311, + "args": { + "External id": 978165,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937246177.640, "dur": 4.141, + "args": { + "External id": 978166,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937246213.434, "dur": 20.339, + "args": { + "External id": 978167,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937246252.047, "dur": 10.446, + "args": { + "External id": 978168,"Record function id": 0, "Ev Idx": 759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937246254.682, "dur": 6.900, + "args": { + "External id": 978169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937246257.031, "dur": 3.473, + "args": { + "External id": 978170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937246258.278, "dur": 2.103, + "args": { + "External id": 978171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937246266.818, "dur": 11.180, + "args": { + "External id": 978172,"Record function id": 0, "Sequence number": 10552388, "Fwd thread id": 1, "Ev Idx": 763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937246267.984, "dur": 7.131, + "args": { + "External id": 978173,"Sequence number": 10552388, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 764 + } + }, + { + "ph": "f", "id": 78, "pid": 2338708, "tid": 2379421, "ts": 6345937246267.984, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937246269.923, "dur": 4.888, + "args": { + "External id": 978174,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937246273.039, "dur": 1.599, + "args": { + "External id": 978175,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937246282.013, "dur": 154.637, + "args": { + "External id": 978176,"Record function id": 0, "Sequence number": 10552387, "Fwd thread id": 1, "Ev Idx": 767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937246283.367, "dur": 145.484, + "args": { + "External id": 978177,"Sequence number": 10552387, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 768 + } + }, + { + "ph": "f", "id": 79, "pid": 2338708, "tid": 2379421, "ts": 6345937246283.367, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937246287.147, "dur": 5.384, + "args": { + "External id": 978178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937246289.013, "dur": 2.887, + "args": { + "External id": 978179,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937246290.380, "dur": 1.099, + "args": { + "External id": 978180,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937246293.809, "dur": 77.429, + "args": { + "External id": 978181,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937246372.951, "dur": 9.507, + "args": { + "External id": 978182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937246376.284, "dur": 5.509, + "args": { + "External id": 978183,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937246380.298, "dur": 1.360, + "args": { + "External id": 978184,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937246384.042, "dur": 3.061, + "args": { + "External id": 978185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937246384.973, "dur": 1.455, + "args": { + "External id": 978186,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937246385.858, "dur": 0.487, + "args": { + "External id": 978187,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937246387.670, "dur": 40.220, + "args": { + "External id": 978188,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937246442.357, "dur": 15.501, + "args": { + "External id": 978189,"Record function id": 0, "Sequence number": 10552386, "Fwd thread id": 1, "Ev Idx": 780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937246443.510, "dur": 11.722, + "args": { + "External id": 978190,"Sequence number": 10552386, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 781 + } + }, + { + "ph": "f", "id": 80, "pid": 2338708, "tid": 2379421, "ts": 6345937246443.510, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937246445.090, "dur": 9.955, + "args": { + "External id": 978191,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937246451.355, "dur": 3.576, + "args": { + "External id": 978192,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937246464.317, "dur": 7.639, + "args": { + "External id": 978193,"Record function id": 0, "Sequence number": 10552385, "Fwd thread id": 1, "Ev Idx": 784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937246465.528, "dur": 4.586, + "args": { + "External id": 978194,"Sequence number": 10552385, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 785 + } + }, + { + "ph": "f", "id": 81, "pid": 2338708, "tid": 2379421, "ts": 6345937246465.528, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937246466.542, "dur": 3.289, + "args": { + "External id": 978195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937246467.424, "dur": 1.837, + "args": { + "External id": 978196,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937246468.412, "dur": 0.693, + "args": { + "External id": 978197,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937246476.846, "dur": 5.592, + "args": { + "External id": 978198,"Record function id": 0, "Ev Idx": 789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937246478.288, "dur": 3.551, + "args": { + "External id": 978199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937246479.642, "dur": 1.923, + "args": { + "External id": 978200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937246480.334, "dur": 1.119, + "args": { + "External id": 978201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937246486.232, "dur": 8.964, + "args": { + "External id": 978202,"Record function id": 0, "Sequence number": 10552384, "Fwd thread id": 1, "Ev Idx": 793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937246489.581, "dur": 3.484, + "args": { + "External id": 978203,"Sequence number": 10552384, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 794 + } + }, + { + "ph": "f", "id": 82, "pid": 2338708, "tid": 2379421, "ts": 6345937246489.581, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937246490.853, "dur": 2.022, + "args": { + "External id": 978204,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937246491.375, "dur": 1.384, + "args": { + "External id": 978205,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937246499.889, "dur": 403.375, + "args": { + "External id": 978206,"Record function id": 0, "Sequence number": 10552383, "Fwd thread id": 1, "Ev Idx": 797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937246504.245, "dur": 378.690, + "args": { + "External id": 978207,"Sequence number": 10552383, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 798 + } + }, + { + "ph": "f", "id": 83, "pid": 2338708, "tid": 2379421, "ts": 6345937246504.245, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937246520.916, "dur": 6.839, + "args": { + "External id": 978208,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937246523.275, "dur": 3.969, + "args": { + "External id": 978209,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937246530.117, "dur": 5.516, + "args": { + "External id": 978210,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937246533.858, "dur": 1.547, + "args": { + "External id": 978211,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937246537.173, "dur": 3.938, + "args": { + "External id": 978212,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937246538.270, "dur": 2.607, + "args": { + "External id": 978213,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937246575.068, "dur": 279.798, + "args": { + "External id": 978214,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937246665.735, "dur": 3.745, + "args": { + "External id": 978215,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937246673.615, "dur": 8.086, + "args": { + "External id": 978216,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937246685.627, "dur": 2.245, + "args": { + "External id": 978217,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937246689.055, "dur": 1.898, + "args": { + "External id": 978218,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937246741.232, "dur": 3.026, + "args": { + "External id": 978219,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937246742.412, "dur": 1.700, + "args": { + "External id": 978220,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937246745.872, "dur": 34.087, + "args": { + "External id": 978221,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937246755.798, "dur": 0.914, + "args": { + "External id": 978222,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937246781.096, "dur": 1.309, + "args": { + "External id": 978223,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937246781.712, "dur": 0.607, + "args": { + "External id": 978224,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937246783.376, "dur": 19.067, + "args": { + "External id": 978225,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937246785.000, "dur": 2.853, + "args": { + "External id": 978226,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937246868.697, "dur": 3.962, + "args": { + "External id": 978227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937246876.255, "dur": 0.665, + "args": { + "External id": 978228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937246879.306, "dur": 0.729, + "args": { + "External id": 978229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937246911.976, "dur": 317.098, + "args": { + "External id": 978230,"Record function id": 0, "Sequence number": 10552382, "Fwd thread id": 1, "Ev Idx": 821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937246913.494, "dur": 303.860, + "args": { + "External id": 978231,"Sequence number": 10552382, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 822 + } + }, + { + "ph": "f", "id": 84, "pid": 2338708, "tid": 2379421, "ts": 6345937246913.494, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345937246934.446, "dur": 45.884, + "args": { + "External id": 978232,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937246939.464, "dur": 3.745, + "args": { + "External id": 978233,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937246944.911, "dur": 34.666, + "args": { + "External id": 978234,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937246991.213, "dur": 5.267, + "args": { + "External id": 978235,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937246993.042, "dur": 3.100, + "args": { + "External id": 978236,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937247241.512, "dur": 216.893, + "args": { + "External id": 978237,"Record function id": 0, "Sequence number": 10552381, "Fwd thread id": 1, "Ev Idx": 828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937247243.822, "dur": 205.632, + "args": { + "External id": 978238,"Sequence number": 10552381, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 829 + } + }, + { + "ph": "f", "id": 85, "pid": 2338708, "tid": 2379421, "ts": 6345937247243.822, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345937247258.826, "dur": 65.865, + "args": { + "External id": 978239,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937247264.525, "dur": 8.334, + "args": { + "External id": 978240,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937247274.083, "dur": 49.937, + "args": { + "External id": 978241,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937247333.927, "dur": 4.556, + "args": { + "External id": 978242,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937247335.332, "dur": 2.785, + "args": { + "External id": 978243,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247465.635, "dur": 16.781, + "args": { + "External id": 978244,"Record function id": 0, "Sequence number": 10552380, "Fwd thread id": 1, "Ev Idx": 835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247467.228, "dur": 11.795, + "args": { + "External id": 978245,"Sequence number": 10552380, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 836 + } + }, + { + "ph": "f", "id": 86, "pid": 2338708, "tid": 2379421, "ts": 6345937247467.228, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937247469.801, "dur": 8.878, + "args": { + "External id": 978246,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937247473.208, "dur": 5.213, + "args": { + "External id": 978247,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247486.924, "dur": 7.078, + "args": { + "External id": 978248,"Record function id": 0, "Sequence number": 10552379, "Fwd thread id": 1, "Ev Idx": 839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247487.916, "dur": 3.947, + "args": { + "External id": 978249,"Sequence number": 10552379, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 840 + } + }, + { + "ph": "f", "id": 87, "pid": 2338708, "tid": 2379421, "ts": 6345937247487.916, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937247489.576, "dur": 2.109, + "args": { + "External id": 978250,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937247490.304, "dur": 1.176, + "args": { + "External id": 978251,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247500.677, "dur": 11.904, + "args": { + "External id": 978252,"Record function id": 0, "Sequence number": 10552378, "Fwd thread id": 1, "Ev Idx": 843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247504.241, "dur": 6.023, + "args": { + "External id": 978253,"Sequence number": 10552378, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 844 + } + }, + { + "ph": "f", "id": 88, "pid": 2338708, "tid": 2379421, "ts": 6345937247504.241, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937247505.777, "dur": 4.304, + "args": { + "External id": 978254,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937247508.795, "dur": 1.180, + "args": { + "External id": 978255,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247516.680, "dur": 6.659, + "args": { + "External id": 978256,"Record function id": 0, "Sequence number": 10552377, "Fwd thread id": 1, "Ev Idx": 847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247517.694, "dur": 3.085, + "args": { + "External id": 978257,"Sequence number": 10552377, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 848 + } + }, + { + "ph": "f", "id": 89, "pid": 2338708, "tid": 2379421, "ts": 6345937247517.694, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937247518.957, "dur": 1.641, + "args": { + "External id": 978258,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937247519.476, "dur": 0.948, + "args": { + "External id": 978259,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247527.199, "dur": 178.571, + "args": { + "External id": 978260,"Record function id": 0, "Sequence number": 10552376, "Fwd thread id": 1, "Ev Idx": 851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247528.128, "dur": 170.268, + "args": { + "External id": 978261,"Sequence number": 10552376, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 852 + } + }, + { + "ph": "f", "id": 90, "pid": 2338708, "tid": 2379421, "ts": 6345937247528.128, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937247535.029, "dur": 6.920, + "args": { + "External id": 978262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937247537.104, "dur": 4.037, + "args": { + "External id": 978263,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937247539.090, "dur": 1.736, + "args": { + "External id": 978264,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937247543.573, "dur": 80.209, + "args": { + "External id": 978265,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937247625.483, "dur": 13.075, + "args": { + "External id": 978266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937247626.516, "dur": 11.099, + "args": { + "External id": 978267,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937247632.888, "dur": 4.491, + "args": { + "External id": 978268,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937247640.674, "dur": 3.469, + "args": { + "External id": 978269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937247641.881, "dur": 1.475, + "args": { + "External id": 978270,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937247642.672, "dur": 0.577, + "args": { + "External id": 978271,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937247644.866, "dur": 52.342, + "args": { + "External id": 978272,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247711.613, "dur": 12.571, + "args": { + "External id": 978273,"Record function id": 0, "Sequence number": 10552375, "Fwd thread id": 1, "Ev Idx": 864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247712.598, "dur": 9.411, + "args": { + "External id": 978274,"Sequence number": 10552375, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 865 + } + }, + { + "ph": "f", "id": 91, "pid": 2338708, "tid": 2379421, "ts": 6345937247712.598, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937247714.584, "dur": 7.219, + "args": { + "External id": 978275,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937247720.265, "dur": 1.385, + "args": { + "External id": 978276,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247728.024, "dur": 9.631, + "args": { + "External id": 978277,"Record function id": 0, "Sequence number": 10552374, "Fwd thread id": 1, "Ev Idx": 868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247729.137, "dur": 4.398, + "args": { + "External id": 978278,"Sequence number": 10552374, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 869 + } + }, + { + "ph": "f", "id": 92, "pid": 2338708, "tid": 2379421, "ts": 6345937247729.137, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937247730.177, "dur": 3.107, + "args": { + "External id": 978279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937247730.855, "dur": 1.856, + "args": { + "External id": 978280,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937247731.802, "dur": 0.791, + "args": { + "External id": 978281,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937247744.483, "dur": 13.534, + "args": { + "External id": 978282,"Record function id": 0, "Ev Idx": 873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937247746.109, "dur": 10.973, + "args": { + "External id": 978283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937247748.914, "dur": 7.656, + "args": { + "External id": 978284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937247753.169, "dur": 3.236, + "args": { + "External id": 978285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247761.809, "dur": 9.192, + "args": { + "External id": 978286,"Record function id": 0, "Sequence number": 10552373, "Fwd thread id": 1, "Ev Idx": 877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247763.080, "dur": 5.329, + "args": { + "External id": 978287,"Sequence number": 10552373, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 878 + } + }, + { + "ph": "f", "id": 93, "pid": 2338708, "tid": 2379421, "ts": 6345937247763.080, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937247764.284, "dur": 3.935, + "args": { + "External id": 978288,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937247767.236, "dur": 0.853, + "args": { + "External id": 978289,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247774.573, "dur": 117.152, + "args": { + "External id": 978290,"Record function id": 0, "Sequence number": 10552372, "Fwd thread id": 1, "Ev Idx": 881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247775.536, "dur": 108.733, + "args": { + "External id": 978291,"Sequence number": 10552372, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 882 + } + }, + { + "ph": "f", "id": 94, "pid": 2338708, "tid": 2379421, "ts": 6345937247775.536, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937247777.842, "dur": 5.265, + "args": { + "External id": 978292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937247778.534, "dur": 4.065, + "args": { + "External id": 978293,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937247781.921, "dur": 0.555, + "args": { + "External id": 978294,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937247783.822, "dur": 34.921, + "args": { + "External id": 978295,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937247820.036, "dur": 11.409, + "args": { + "External id": 978296,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937247826.649, "dur": 4.023, + "args": { + "External id": 978297,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937247827.639, "dur": 2.865, + "args": { + "External id": 978298,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937247832.804, "dur": 7.305, + "args": { + "External id": 978299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937247836.016, "dur": 3.556, + "args": { + "External id": 978300,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937247838.991, "dur": 0.465, + "args": { + "External id": 978301,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937247840.849, "dur": 42.455, + "args": { + "External id": 978302,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247897.003, "dur": 39.957, + "args": { + "External id": 978303,"Record function id": 0, "Sequence number": 10552371, "Fwd thread id": 1, "Ev Idx": 894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247898.259, "dur": 3.677, + "args": { + "External id": 978304,"Sequence number": 10552371, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 895 + } + }, + { + "ph": "f", "id": 95, "pid": 2338708, "tid": 2379421, "ts": 6345937247898.259, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937247899.638, "dur": 2.126, + "args": { + "External id": 978305,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937247900.304, "dur": 1.298, + "args": { + "External id": 978306,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345937247908.376, "dur": 25.764, + "args": { + "External id": 978307,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247944.413, "dur": 17.363, + "args": { + "External id": 978308,"Record function id": 0, "Sequence number": 10552370, "Fwd thread id": 1, "Ev Idx": 899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247945.815, "dur": 13.959, + "args": { + "External id": 978309,"Sequence number": 10552370, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 900 + } + }, + { + "ph": "f", "id": 96, "pid": 2338708, "tid": 2379421, "ts": 6345937247945.815, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937247949.186, "dur": 10.317, + "args": { + "External id": 978310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937247950.201, "dur": 8.608, + "args": { + "External id": 978311,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937247957.965, "dur": 0.687, + "args": { + "External id": 978312,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937247966.250, "dur": 7.973, + "args": { + "External id": 978313,"Record function id": 0, "Ev Idx": 904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937247967.553, "dur": 6.099, + "args": { + "External id": 978314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937247968.764, "dur": 4.560, + "args": { + "External id": 978315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937247969.480, "dur": 3.676, + "args": { + "External id": 978316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247980.080, "dur": 8.570, + "args": { + "External id": 978317,"Record function id": 0, "Sequence number": 10552369, "Fwd thread id": 1, "Ev Idx": 908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247981.226, "dur": 5.148, + "args": { + "External id": 978318,"Sequence number": 10552369, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 909 + } + }, + { + "ph": "f", "id": 97, "pid": 2338708, "tid": 2379421, "ts": 6345937247981.226, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937247982.196, "dur": 4.007, + "args": { + "External id": 978319,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937247985.206, "dur": 0.864, + "args": { + "External id": 978320,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247992.550, "dur": 188.847, + "args": { + "External id": 978321,"Record function id": 0, "Sequence number": 10552368, "Fwd thread id": 1, "Ev Idx": 912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937247993.570, "dur": 176.335, + "args": { + "External id": 978322,"Sequence number": 10552368, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 913 + } + }, + { + "ph": "f", "id": 98, "pid": 2338708, "tid": 2379421, "ts": 6345937247993.570, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937247996.216, "dur": 2.746, + "args": { + "External id": 978323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937247996.821, "dur": 1.650, + "args": { + "External id": 978324,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937247997.607, "dur": 0.740, + "args": { + "External id": 978325,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937247999.580, "dur": 105.495, + "args": { + "External id": 978326,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937248110.423, "dur": 7.238, + "args": { + "External id": 978327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937248111.249, "dur": 5.281, + "args": { + "External id": 978328,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937248115.508, "dur": 0.882, + "args": { + "External id": 978329,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937248119.123, "dur": 2.857, + "args": { + "External id": 978330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937248120.174, "dur": 1.283, + "args": { + "External id": 978331,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937248120.903, "dur": 0.439, + "args": { + "External id": 978332,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937248125.177, "dur": 43.926, + "args": { + "External id": 978333,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937248189.648, "dur": 34.862, + "args": { + "External id": 978334,"Record function id": 0, "Sequence number": 10552367, "Fwd thread id": 1, "Ev Idx": 925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937248190.915, "dur": 6.960, + "args": { + "External id": 978335,"Sequence number": 10552367, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 926 + } + }, + { + "ph": "f", "id": 99, "pid": 2338708, "tid": 2379421, "ts": 6345937248190.915, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937248192.616, "dur": 5.075, + "args": { + "External id": 978336,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937248196.050, "dur": 1.484, + "args": { + "External id": 978337,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937248201.034, "dur": 20.796, + "args": { + "External id": 978338,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937248228.909, "dur": 13.628, + "args": { + "External id": 978339,"Record function id": 0, "Sequence number": 10552366, "Fwd thread id": 1, "Ev Idx": 930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937248230.173, "dur": 9.935, + "args": { + "External id": 978340,"Sequence number": 10552366, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 931 + } + }, + { + "ph": "f", "id": 100, "pid": 2338708, "tid": 2379421, "ts": 6345937248230.173, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937248231.219, "dur": 8.645, + "args": { + "External id": 978341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937248231.994, "dur": 7.202, + "args": { + "External id": 978342,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937248235.989, "dur": 3.095, + "args": { + "External id": 978343,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937248247.589, "dur": 6.240, + "args": { + "External id": 978344,"Record function id": 0, "Ev Idx": 935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937248248.986, "dur": 4.226, + "args": { + "External id": 978345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937248250.358, "dur": 2.489, + "args": { + "External id": 978346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937248251.164, "dur": 1.536, + "args": { + "External id": 978347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937248258.764, "dur": 408.493, + "args": { + "External id": 978348,"Record function id": 0, "Sequence number": 10552365, "Fwd thread id": 1, "Ev Idx": 939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937248260.099, "dur": 373.459, + "args": { + "External id": 978349,"Sequence number": 10552365, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 940 + } + }, + { + "ph": "f", "id": 101, "pid": 2338708, "tid": 2379421, "ts": 6345937248260.099, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937248300.177, "dur": 2.375, + "args": { + "External id": 978350,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937248300.957, "dur": 1.358, + "args": { + "External id": 978351,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937248320.064, "dur": 4.682, + "args": { + "External id": 978352,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937248337.097, "dur": 2.847, + "args": { + "External id": 978353,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937248510.819, "dur": 2.145, + "args": { + "External id": 978354,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937248517.434, "dur": 42.418, + "args": { + "External id": 978355,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937248531.736, "dur": 0.914, + "args": { + "External id": 978356,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937248565.850, "dur": 36.111, + "args": { + "External id": 978357,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937248567.623, "dur": 34.039, + "args": { + "External id": 978358,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937248572.130, "dur": 6.771, + "args": { + "External id": 978359,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937248580.622, "dur": 20.292, + "args": { + "External id": 978360,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345937248609.014, "dur": 5.291, + "args": { + "External id": 978361,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937248610.118, "dur": 4.007, + "args": { + "External id": 978362,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937248623.805, "dur": 1.987, + "args": { + "External id": 978363,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937248624.525, "dur": 1.152, + "args": { + "External id": 978364,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937248644.003, "dur": 19.183, + "args": { + "External id": 978365,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937248677.772, "dur": 10.161, + "args": { + "External id": 978366,"Record function id": 0, "Ev Idx": 957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937248679.598, "dur": 7.500, + "args": { + "External id": 978367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937248681.354, "dur": 4.714, + "args": { + "External id": 978368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937248684.358, "dur": 1.572, + "args": { + "External id": 978369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937248692.252, "dur": 6.737, + "args": { + "External id": 978370,"Record function id": 0, "Sequence number": 10552364, "Fwd thread id": 1, "Ev Idx": 961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937248693.757, "dur": 1.224, + "args": { + "External id": 978371,"Sequence number": 10552364, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 962 + } + }, + { + "ph": "f", "id": 102, "pid": 2338708, "tid": 2379421, "ts": 6345937248693.757, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937248703.499, "dur": 532.368, + "args": { + "External id": 978372,"Record function id": 0, "Sequence number": 10552363, "Fwd thread id": 1, "Ev Idx": 963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937248704.649, "dur": 516.336, + "args": { + "External id": 978373,"Sequence number": 10552363, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 964 + } + }, + { + "ph": "f", "id": 103, "pid": 2338708, "tid": 2379421, "ts": 6345937248704.649, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937248736.466, "dur": 11.463, + "args": { + "External id": 978374,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937248744.114, "dur": 3.406, + "args": { + "External id": 978375,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937248751.229, "dur": 6.731, + "args": { + "External id": 978376,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937248755.214, "dur": 2.111, + "args": { + "External id": 978377,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937248756.505, "dur": 0.647, + "args": { + "External id": 978378,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2379421, + "ts": 6345937248761.940, "dur": 90.477, + "args": { + "External id": 978379,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937248762.857, "dur": 2.731, + "args": { + "External id": 978380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937248763.449, "dur": 1.538, + "args": { + "External id": 978381,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937248764.119, "dur": 0.775, + "args": { + "External id": 978382,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2379421, + "ts": 6345937248769.582, "dur": 82.257, + "args": { + "External id": 978383,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937248771.179, "dur": 79.748, + "args": { + "External id": 978384,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345937248856.808, "dur": 5.682, + "args": { + "External id": 978385,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937248860.789, "dur": 1.531, + "args": { + "External id": 978386,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937248897.520, "dur": 5.454, + "args": { + "External id": 978387,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937248904.189, "dur": 4.581, + "args": { + "External id": 978388,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937248909.699, "dur": 1.801, + "args": { + "External id": 978389,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937248951.697, "dur": 2.153, + "args": { + "External id": 978390,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937248952.297, "dur": 1.365, + "args": { + "External id": 978391,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338708, "tid": 2379421, + "ts": 6345937248977.639, "dur": 220.606, + "args": { + "External id": 978392,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345937248983.585, "dur": 5.627, + "args": { + "External id": 978393,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937248987.184, "dur": 0.929, + "args": { + "External id": 978394,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937248992.552, "dur": 6.521, + "args": { + "External id": 978395,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937248997.593, "dur": 0.643, + "args": { + "External id": 978396,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345937249000.863, "dur": 3.910, + "args": { + "External id": 978397,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937249003.879, "dur": 0.575, + "args": { + "External id": 978398,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937249005.518, "dur": 28.487, + "args": { + "External id": 978399,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937249031.878, "dur": 0.884, + "args": { + "External id": 978400,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937249040.866, "dur": 4.830, + "args": { + "External id": 978401,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937249044.805, "dur": 0.528, + "args": { + "External id": 978402,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937249046.636, "dur": 51.026, + "args": { + "External id": 978403,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937249052.734, "dur": 44.106, + "args": { + "External id": 978404,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937249099.713, "dur": 2.481, + "args": { + "External id": 978405,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937249101.226, "dur": 0.612, + "args": { + "External id": 978406,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937249105.273, "dur": 4.002, + "args": { + "External id": 978407,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937249105.961, "dur": 3.203, + "args": { + "External id": 978408,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345937249110.853, "dur": 70.567, + "args": { + "External id": 978409,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 1000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937249185.991, "dur": 1.344, + "args": { + "External id": 978410,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 1001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937249188.049, "dur": 4.845, + "args": { + "External id": 978411,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 1002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937249191.866, "dur": 0.539, + "args": { + "External id": 978412,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 1003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937249195.672, "dur": 1.052, + "args": { + "External id": 978413,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 1004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937249250.047, "dur": 11.082, + "args": { + "External id": 978414,"Record function id": 0, "Ev Idx": 1005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937249252.418, "dur": 7.961, + "args": { + "External id": 978415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937249255.306, "dur": 4.151, + "args": { + "External id": 978416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937249256.676, "dur": 2.681, + "args": { + "External id": 978417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249265.379, "dur": 12.755, + "args": { + "External id": 978418,"Record function id": 0, "Sequence number": 10552362, "Fwd thread id": 1, "Ev Idx": 1009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249266.572, "dur": 8.962, + "args": { + "External id": 978419,"Sequence number": 10552362, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1010 + } + }, + { + "ph": "f", "id": 104, "pid": 2338708, "tid": 2379421, "ts": 6345937249266.572, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937249268.670, "dur": 6.552, + "args": { + "External id": 978420,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937249273.993, "dur": 1.116, + "args": { + "External id": 978421,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249282.331, "dur": 163.851, + "args": { + "External id": 978422,"Record function id": 0, "Sequence number": 10552361, "Fwd thread id": 1, "Ev Idx": 1013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249283.240, "dur": 154.397, + "args": { + "External id": 978423,"Sequence number": 10552361, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1014 + } + }, + { + "ph": "f", "id": 105, "pid": 2338708, "tid": 2379421, "ts": 6345937249283.240, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937249287.277, "dur": 7.450, + "args": { + "External id": 978424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937249288.916, "dur": 5.181, + "args": { + "External id": 978425,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937249290.092, "dur": 3.835, + "args": { + "External id": 978426,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937249297.900, "dur": 47.915, + "args": { + "External id": 978427,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937249347.325, "dur": 4.017, + "args": { + "External id": 978428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937249348.159, "dur": 2.372, + "args": { + "External id": 978429,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937249349.235, "dur": 1.099, + "args": { + "External id": 978430,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937249352.817, "dur": 42.384, + "args": { + "External id": 978431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937249353.838, "dur": 40.797, + "args": { + "External id": 978432,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937249394.069, "dur": 0.458, + "args": { + "External id": 978433,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937249395.938, "dur": 40.615, + "args": { + "External id": 978434,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249454.650, "dur": 6.551, + "args": { + "External id": 978435,"Record function id": 0, "Sequence number": 10552360, "Fwd thread id": 1, "Ev Idx": 1026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249455.609, "dur": 3.971, + "args": { + "External id": 978436,"Sequence number": 10552360, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1027 + } + }, + { + "ph": "f", "id": 106, "pid": 2338708, "tid": 2379421, "ts": 6345937249455.609, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937249457.155, "dur": 2.259, + "args": { + "External id": 978437,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937249457.709, "dur": 1.570, + "args": { + "External id": 978438,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249465.286, "dur": 9.548, + "args": { + "External id": 978439,"Record function id": 0, "Sequence number": 10552359, "Fwd thread id": 1, "Ev Idx": 1030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249466.283, "dur": 6.366, + "args": { + "External id": 978440,"Sequence number": 10552359, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1031 + } + }, + { + "ph": "f", "id": 107, "pid": 2338708, "tid": 2379421, "ts": 6345937249466.283, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937249467.225, "dur": 5.173, + "args": { + "External id": 978441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937249468.129, "dur": 3.766, + "args": { + "External id": 978442,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937249471.234, "dur": 0.480, + "args": { + "External id": 978443,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937249481.225, "dur": 8.060, + "args": { + "External id": 978444,"Record function id": 0, "Ev Idx": 1035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937249482.681, "dur": 5.950, + "args": { + "External id": 978445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937249483.694, "dur": 4.694, + "args": { + "External id": 978446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937249484.444, "dur": 3.755, + "args": { + "External id": 978447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249492.877, "dur": 5.863, + "args": { + "External id": 978448,"Record function id": 0, "Sequence number": 10552358, "Fwd thread id": 1, "Ev Idx": 1039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249493.972, "dur": 2.618, + "args": { + "External id": 978449,"Sequence number": 10552358, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1040 + } + }, + { + "ph": "f", "id": 108, "pid": 2338708, "tid": 2379421, "ts": 6345937249493.972, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937249494.908, "dur": 1.500, + "args": { + "External id": 978450,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937249495.409, "dur": 0.830, + "args": { + "External id": 978451,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249502.442, "dur": 106.964, + "args": { + "External id": 978452,"Record function id": 0, "Sequence number": 10552357, "Fwd thread id": 1, "Ev Idx": 1043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249503.473, "dur": 97.337, + "args": { + "External id": 978453,"Sequence number": 10552357, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1044 + } + }, + { + "ph": "f", "id": 109, "pid": 2338708, "tid": 2379421, "ts": 6345937249503.473, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937249508.041, "dur": 2.636, + "args": { + "External id": 978454,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937249508.521, "dur": 1.681, + "args": { + "External id": 978455,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937249509.255, "dur": 0.771, + "args": { + "External id": 978456,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937249511.294, "dur": 40.211, + "args": { + "External id": 978457,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937249552.865, "dur": 7.370, + "args": { + "External id": 978458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937249553.705, "dur": 5.959, + "args": { + "External id": 978459,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937249558.761, "dur": 0.777, + "args": { + "External id": 978460,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937249561.500, "dur": 2.509, + "args": { + "External id": 978461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937249562.468, "dur": 1.121, + "args": { + "External id": 978462,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937249563.114, "dur": 0.398, + "args": { + "External id": 978463,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937249564.630, "dur": 35.314, + "args": { + "External id": 978464,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249614.859, "dur": 42.430, + "args": { + "External id": 978465,"Record function id": 0, "Sequence number": 10552356, "Fwd thread id": 1, "Ev Idx": 1056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249615.789, "dur": 8.551, + "args": { + "External id": 978466,"Sequence number": 10552356, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1057 + } + }, + { + "ph": "f", "id": 110, "pid": 2338708, "tid": 2379421, "ts": 6345937249615.789, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937249617.367, "dur": 6.798, + "args": { + "External id": 978467,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937249622.630, "dur": 1.368, + "args": { + "External id": 978468,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345937249627.874, "dur": 26.267, + "args": { + "External id": 978469,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249662.176, "dur": 7.776, + "args": { + "External id": 978470,"Record function id": 0, "Sequence number": 10552355, "Fwd thread id": 1, "Ev Idx": 1061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937249663.173, "dur": 4.468, + "args": { + "External id": 978471,"Sequence number": 10552355, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1062 + } + }, + { + "ph": "f", "id": 111, "pid": 2338708, "tid": 2379421, "ts": 6345937249663.173, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937249664.041, "dur": 3.339, + "args": { + "External id": 978472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937249665.056, "dur": 1.747, + "args": { + "External id": 978473,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937249665.914, "dur": 0.769, + "args": { + "External id": 978474,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937249674.621, "dur": 7.681, + "args": { + "External id": 978475,"Record function id": 0, "Ev Idx": 1066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937249676.017, "dur": 5.718, + "args": { + "External id": 978476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937249677.014, "dur": 4.427, + "args": { + "External id": 978477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937249680.234, "dur": 1.063, + "args": { + "External id": 978478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937249686.855, "dur": 546.734, + "args": { + "External id": 978479,"Record function id": 0, "Sequence number": 10552354, "Fwd thread id": 1, "Ev Idx": 1070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937249688.169, "dur": 504.561, + "args": { + "External id": 978480,"Sequence number": 10552354, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 1071 + } + }, + { + "ph": "f", "id": 112, "pid": 2338708, "tid": 2379421, "ts": 6345937249688.169, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2379421, + "ts": 6345937249713.912, "dur": 41.210, + "args": { + "External id": 978481,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345937249715.640, "dur": 39.223, + "args": { + "External id": 978482,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937249721.109, "dur": 6.896, + "args": { + "External id": 978483,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 1074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937249723.844, "dur": 3.496, + "args": { + "External id": 978484,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937249729.580, "dur": 24.673, + "args": { + "External id": 978485,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937249767.795, "dur": 2.054, + "args": { + "External id": 978486,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937249768.456, "dur": 1.253, + "args": { + "External id": 978487,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937249775.189, "dur": 3.936, + "args": { + "External id": 978488,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937249778.190, "dur": 0.801, + "args": { + "External id": 978489,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937249792.889, "dur": 2.886, + "args": { + "External id": 978490,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937249809.929, "dur": 2.736, + "args": { + "External id": 978491,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937249991.445, "dur": 4.768, + "args": { + "External id": 978492,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937250000.654, "dur": 102.531, + "args": { + "External id": 978493,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937250035.468, "dur": 1.688, + "args": { + "External id": 978494,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937250112.601, "dur": 39.860, + "args": { + "External id": 978495,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937250116.815, "dur": 35.415, + "args": { + "External id": 978496,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937250121.789, "dur": 7.896, + "args": { + "External id": 978497,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937250131.504, "dur": 20.193, + "args": { + "External id": 978498,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345937250157.868, "dur": 3.088, + "args": { + "External id": 978499,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937250158.849, "dur": 1.972, + "args": { + "External id": 978500,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937250169.270, "dur": 7.579, + "args": { + "External id": 978501,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937250175.463, "dur": 1.262, + "args": { + "External id": 978502,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937250179.309, "dur": 1.925, + "args": { + "External id": 978503,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937250180.000, "dur": 1.065, + "args": { + "External id": 978504,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937250211.703, "dur": 20.152, + "args": { + "External id": 978505,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937250249.170, "dur": 10.762, + "args": { + "External id": 978506,"Record function id": 0, "Ev Idx": 1097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937250251.655, "dur": 7.260, + "args": { + "External id": 978507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937250254.396, "dur": 3.434, + "args": { + "External id": 978508,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937250255.737, "dur": 1.977, + "args": { + "External id": 978509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937250264.446, "dur": 13.099, + "args": { + "External id": 978510,"Record function id": 0, "Sequence number": 10552353, "Fwd thread id": 1, "Ev Idx": 1101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937250265.823, "dur": 8.249, + "args": { + "External id": 978511,"Sequence number": 10552353, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1102 + } + }, + { + "ph": "f", "id": 113, "pid": 2338708, "tid": 2379421, "ts": 6345937250265.823, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937250267.613, "dur": 6.221, + "args": { + "External id": 978512,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937250272.120, "dur": 1.586, + "args": { + "External id": 978513,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937250281.763, "dur": 204.393, + "args": { + "External id": 978514,"Record function id": 0, "Sequence number": 10552352, "Fwd thread id": 1, "Ev Idx": 1105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937250282.790, "dur": 195.481, + "args": { + "External id": 978515,"Sequence number": 10552352, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1106 + } + }, + { + "ph": "f", "id": 114, "pid": 2338708, "tid": 2379421, "ts": 6345937250282.790, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937250286.760, "dur": 5.765, + "args": { + "External id": 978516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937250288.588, "dur": 3.281, + "args": { + "External id": 978517,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937250290.393, "dur": 1.223, + "args": { + "External id": 978518,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937250296.655, "dur": 93.190, + "args": { + "External id": 978519,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937250391.451, "dur": 4.665, + "args": { + "External id": 978520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937250392.377, "dur": 2.835, + "args": { + "External id": 978521,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937250393.921, "dur": 1.112, + "args": { + "External id": 978522,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937250397.763, "dur": 18.854, + "args": { + "External id": 978523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937250407.714, "dur": 8.323, + "args": { + "External id": 978524,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937250412.596, "dur": 3.286, + "args": { + "External id": 978525,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937250417.312, "dur": 60.018, + "args": { + "External id": 978526,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937250494.960, "dur": 7.510, + "args": { + "External id": 978527,"Record function id": 0, "Sequence number": 10552351, "Fwd thread id": 1, "Ev Idx": 1118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937250496.001, "dur": 4.014, + "args": { + "External id": 978528,"Sequence number": 10552351, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1119 + } + }, + { + "ph": "f", "id": 115, "pid": 2338708, "tid": 2379421, "ts": 6345937250496.001, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937250497.599, "dur": 2.245, + "args": { + "External id": 978529,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937250498.377, "dur": 1.344, + "args": { + "External id": 978530,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937250506.751, "dur": 12.254, + "args": { + "External id": 978531,"Record function id": 0, "Sequence number": 10552350, "Fwd thread id": 1, "Ev Idx": 1122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937250507.851, "dur": 9.244, + "args": { + "External id": 978532,"Sequence number": 10552350, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1123 + } + }, + { + "ph": "f", "id": 116, "pid": 2338708, "tid": 2379421, "ts": 6345937250507.851, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937250509.281, "dur": 7.543, + "args": { + "External id": 978533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937250510.410, "dur": 5.875, + "args": { + "External id": 978534,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937250515.700, "dur": 0.464, + "args": { + "External id": 978535,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937250523.588, "dur": 6.277, + "args": { + "External id": 978536,"Record function id": 0, "Ev Idx": 1127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937250525.101, "dur": 4.149, + "args": { + "External id": 978537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937250526.325, "dur": 2.629, + "args": { + "External id": 978538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937250527.279, "dur": 1.565, + "args": { + "External id": 978539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937250533.511, "dur": 7.286, + "args": { + "External id": 978540,"Record function id": 0, "Sequence number": 10552349, "Fwd thread id": 1, "Ev Idx": 1131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937250534.641, "dur": 3.844, + "args": { + "External id": 978541,"Sequence number": 10552349, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1132 + } + }, + { + "ph": "f", "id": 117, "pid": 2338708, "tid": 2379421, "ts": 6345937250534.641, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937250536.040, "dur": 2.275, + "args": { + "External id": 978542,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937250536.786, "dur": 1.349, + "args": { + "External id": 978543,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937250545.361, "dur": 447.406, + "args": { + "External id": 978544,"Record function id": 0, "Sequence number": 10552348, "Fwd thread id": 1, "Ev Idx": 1135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937250546.565, "dur": 425.175, + "args": { + "External id": 978545,"Sequence number": 10552348, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1136 + } + }, + { + "ph": "f", "id": 118, "pid": 2338708, "tid": 2379421, "ts": 6345937250546.565, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937250563.953, "dur": 11.061, + "args": { + "External id": 978546,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937250570.555, "dur": 3.942, + "args": { + "External id": 978547,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937250577.281, "dur": 3.304, + "args": { + "External id": 978548,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937250578.227, "dur": 2.131, + "args": { + "External id": 978549,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937250585.500, "dur": 9.894, + "args": { + "External id": 978550,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937250588.896, "dur": 6.237, + "args": { + "External id": 978551,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937250624.662, "dur": 318.059, + "args": { + "External id": 978552,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937250714.521, "dur": 6.145, + "args": { + "External id": 978553,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937250722.484, "dur": 2.835, + "args": { + "External id": 978554,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937250728.988, "dur": 2.047, + "args": { + "External id": 978555,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937250731.975, "dur": 3.663, + "args": { + "External id": 978556,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937250826.787, "dur": 3.184, + "args": { + "External id": 978557,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937250827.969, "dur": 1.857, + "args": { + "External id": 978558,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937250834.429, "dur": 29.216, + "args": { + "External id": 978559,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937250840.176, "dur": 0.974, + "args": { + "External id": 978560,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937250866.977, "dur": 1.865, + "args": { + "External id": 978561,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937250868.070, "dur": 0.693, + "args": { + "External id": 978562,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937250872.248, "dur": 19.787, + "args": { + "External id": 978563,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937250876.112, "dur": 0.500, + "args": { + "External id": 978564,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937250957.065, "dur": 4.390, + "args": { + "External id": 978565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937250964.921, "dur": 0.792, + "args": { + "External id": 978566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937250968.068, "dur": 0.664, + "args": { + "External id": 978567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937251002.606, "dur": 354.691, + "args": { + "External id": 978568,"Record function id": 0, "Sequence number": 10552347, "Fwd thread id": 1, "Ev Idx": 1159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937251004.633, "dur": 343.013, + "args": { + "External id": 978569,"Sequence number": 10552347, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1160 + } + }, + { + "ph": "f", "id": 119, "pid": 2338708, "tid": 2379421, "ts": 6345937251004.633, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345937251050.580, "dur": 106.224, + "args": { + "External id": 978570,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937251054.209, "dur": 52.107, + "args": { + "External id": 978571,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937251109.643, "dur": 46.323, + "args": { + "External id": 978572,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 1163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937251170.561, "dur": 8.659, + "args": { + "External id": 978573,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937251175.761, "dur": 3.120, + "args": { + "External id": 978574,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937251368.895, "dur": 188.103, + "args": { + "External id": 978575,"Record function id": 0, "Sequence number": 10552346, "Fwd thread id": 1, "Ev Idx": 1166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937251370.855, "dur": 178.963, + "args": { + "External id": 978576,"Sequence number": 10552346, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1167 + } + }, + { + "ph": "f", "id": 120, "pid": 2338708, "tid": 2379421, "ts": 6345937251370.855, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345937251385.386, "dur": 48.683, + "args": { + "External id": 978577,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937251388.275, "dur": 3.523, + "args": { + "External id": 978578,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937251392.930, "dur": 40.686, + "args": { + "External id": 978579,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 1170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937251441.892, "dur": 9.017, + "args": { + "External id": 978580,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937251447.040, "dur": 3.600, + "args": { + "External id": 978581,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251564.612, "dur": 14.634, + "args": { + "External id": 978582,"Record function id": 0, "Sequence number": 10552345, "Fwd thread id": 1, "Ev Idx": 1173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251566.328, "dur": 10.123, + "args": { + "External id": 978583,"Sequence number": 10552345, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1174 + } + }, + { + "ph": "f", "id": 121, "pid": 2338708, "tid": 2379421, "ts": 6345937251566.328, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937251569.286, "dur": 6.772, + "args": { + "External id": 978584,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937251570.394, "dur": 5.468, + "args": { + "External id": 978585,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251583.573, "dur": 12.508, + "args": { + "External id": 978586,"Record function id": 0, "Sequence number": 10552344, "Fwd thread id": 1, "Ev Idx": 1177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251587.627, "dur": 6.647, + "args": { + "External id": 978587,"Sequence number": 10552344, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1178 + } + }, + { + "ph": "f", "id": 122, "pid": 2338708, "tid": 2379421, "ts": 6345937251587.627, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937251588.999, "dur": 5.055, + "args": { + "External id": 978588,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937251592.546, "dur": 1.360, + "args": { + "External id": 978589,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251599.780, "dur": 5.977, + "args": { + "External id": 978590,"Record function id": 0, "Sequence number": 10552343, "Fwd thread id": 1, "Ev Idx": 1181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251600.841, "dur": 3.249, + "args": { + "External id": 978591,"Sequence number": 10552343, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1182 + } + }, + { + "ph": "f", "id": 123, "pid": 2338708, "tid": 2379421, "ts": 6345937251600.841, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937251602.036, "dur": 1.878, + "args": { + "External id": 978592,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937251602.692, "dur": 1.072, + "args": { + "External id": 978593,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251609.854, "dur": 11.952, + "args": { + "External id": 978594,"Record function id": 0, "Sequence number": 10552342, "Fwd thread id": 1, "Ev Idx": 1185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251610.814, "dur": 8.745, + "args": { + "External id": 978595,"Sequence number": 10552342, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1186 + } + }, + { + "ph": "f", "id": 124, "pid": 2338708, "tid": 2379421, "ts": 6345937251610.814, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937251614.573, "dur": 4.810, + "args": { + "External id": 978596,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937251618.015, "dur": 1.200, + "args": { + "External id": 978597,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251628.227, "dur": 193.661, + "args": { + "External id": 978598,"Record function id": 0, "Sequence number": 10552341, "Fwd thread id": 1, "Ev Idx": 1189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251629.415, "dur": 183.846, + "args": { + "External id": 978599,"Sequence number": 10552341, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1190 + } + }, + { + "ph": "f", "id": 125, "pid": 2338708, "tid": 2379421, "ts": 6345937251629.415, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937251633.566, "dur": 10.510, + "args": { + "External id": 978600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937251636.226, "dur": 7.034, + "args": { + "External id": 978601,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937251638.466, "dur": 4.487, + "args": { + "External id": 978602,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937251646.097, "dur": 88.483, + "args": { + "External id": 978603,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937251736.209, "dur": 14.244, + "args": { + "External id": 978604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937251744.618, "dur": 4.931, + "args": { + "External id": 978605,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937251746.526, "dur": 2.782, + "args": { + "External id": 978606,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937251752.615, "dur": 5.486, + "args": { + "External id": 978607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937251753.930, "dur": 3.582, + "args": { + "External id": 978608,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937251756.998, "dur": 0.415, + "args": { + "External id": 978609,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937251758.802, "dur": 53.381, + "args": { + "External id": 978610,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251828.287, "dur": 10.983, + "args": { + "External id": 978611,"Record function id": 0, "Sequence number": 10552340, "Fwd thread id": 1, "Ev Idx": 1202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251829.561, "dur": 6.900, + "args": { + "External id": 978612,"Sequence number": 10552340, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1203 + } + }, + { + "ph": "f", "id": 126, "pid": 2338708, "tid": 2379421, "ts": 6345937251829.561, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937251831.230, "dur": 5.061, + "args": { + "External id": 978613,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937251834.610, "dur": 1.514, + "args": { + "External id": 978614,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251843.274, "dur": 11.288, + "args": { + "External id": 978615,"Record function id": 0, "Sequence number": 10552339, "Fwd thread id": 1, "Ev Idx": 1206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251844.248, "dur": 7.260, + "args": { + "External id": 978616,"Sequence number": 10552339, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1207 + } + }, + { + "ph": "f", "id": 127, "pid": 2338708, "tid": 2379421, "ts": 6345937251844.248, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937251845.571, "dur": 5.694, + "args": { + "External id": 978617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937251846.592, "dur": 4.129, + "args": { + "External id": 978618,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937251850.045, "dur": 0.521, + "args": { + "External id": 978619,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937251861.379, "dur": 17.442, + "args": { + "External id": 978620,"Record function id": 0, "Ev Idx": 1211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937251863.264, "dur": 14.578, + "args": { + "External id": 978621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937251866.470, "dur": 10.875, + "args": { + "External id": 978622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937251871.710, "dur": 5.458, + "args": { + "External id": 978623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251882.714, "dur": 6.992, + "args": { + "External id": 978624,"Record function id": 0, "Sequence number": 10552338, "Fwd thread id": 1, "Ev Idx": 1215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251883.996, "dur": 3.640, + "args": { + "External id": 978625,"Sequence number": 10552338, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1216 + } + }, + { + "ph": "f", "id": 128, "pid": 2338708, "tid": 2379421, "ts": 6345937251883.996, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937251885.326, "dur": 2.133, + "args": { + "External id": 978626,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937251886.174, "dur": 1.176, + "args": { + "External id": 978627,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251893.442, "dur": 106.826, + "args": { + "External id": 978628,"Record function id": 0, "Sequence number": 10552337, "Fwd thread id": 1, "Ev Idx": 1219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937251894.558, "dur": 98.024, + "args": { + "External id": 978629,"Sequence number": 10552337, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1220 + } + }, + { + "ph": "f", "id": 129, "pid": 2338708, "tid": 2379421, "ts": 6345937251894.558, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937251897.107, "dur": 5.773, + "args": { + "External id": 978630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937251898.246, "dur": 4.095, + "args": { + "External id": 978631,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937251901.695, "dur": 0.513, + "args": { + "External id": 978632,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937251903.575, "dur": 32.871, + "args": { + "External id": 978633,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937251937.731, "dur": 6.497, + "args": { + "External id": 978634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937251938.641, "dur": 4.962, + "args": { + "External id": 978635,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937251942.322, "dur": 1.139, + "args": { + "External id": 978636,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937251945.421, "dur": 5.634, + "args": { + "External id": 978637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937251949.102, "dur": 1.453, + "args": { + "External id": 978638,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937251949.979, "dur": 0.473, + "args": { + "External id": 978639,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937251951.820, "dur": 39.808, + "args": { + "External id": 978640,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937252005.802, "dur": 109.109, + "args": { + "External id": 978641,"Record function id": 0, "Sequence number": 10552336, "Fwd thread id": 1, "Ev Idx": 1232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937252007.110, "dur": 30.487, + "args": { + "External id": 978642,"Sequence number": 10552336, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1233 + } + }, + { + "ph": "f", "id": 130, "pid": 2338708, "tid": 2379421, "ts": 6345937252007.110, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937252031.746, "dur": 5.633, + "args": { + "External id": 978643,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937252034.490, "dur": 2.583, + "args": { + "External id": 978644,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345937252042.334, "dur": 68.736, + "args": { + "External id": 978645,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937252122.844, "dur": 13.557, + "args": { + "External id": 978646,"Record function id": 0, "Sequence number": 10552335, "Fwd thread id": 1, "Ev Idx": 1237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937252127.292, "dur": 6.670, + "args": { + "External id": 978647,"Sequence number": 10552335, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1238 + } + }, + { + "ph": "f", "id": 131, "pid": 2338708, "tid": 2379421, "ts": 6345937252127.292, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937252128.640, "dur": 5.051, + "args": { + "External id": 978648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937252129.937, "dur": 2.917, + "args": { + "External id": 978649,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937252131.880, "dur": 0.806, + "args": { + "External id": 978650,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937252141.396, "dur": 7.441, + "args": { + "External id": 978651,"Record function id": 0, "Ev Idx": 1242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937252142.910, "dur": 5.276, + "args": { + "External id": 978652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937252144.831, "dur": 2.995, + "args": { + "External id": 978653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937252145.760, "dur": 1.935, + "args": { + "External id": 978654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937252152.836, "dur": 12.817, + "args": { + "External id": 978655,"Record function id": 0, "Sequence number": 10552334, "Fwd thread id": 1, "Ev Idx": 1246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937252153.810, "dur": 9.079, + "args": { + "External id": 978656,"Sequence number": 10552334, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1247 + } + }, + { + "ph": "f", "id": 132, "pid": 2338708, "tid": 2379421, "ts": 6345937252153.810, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937252157.806, "dur": 4.891, + "args": { + "External id": 978657,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937252161.176, "dur": 1.349, + "args": { + "External id": 978658,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937252169.589, "dur": 123.539, + "args": { + "External id": 978659,"Record function id": 0, "Sequence number": 10552333, "Fwd thread id": 1, "Ev Idx": 1250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937252170.572, "dur": 111.960, + "args": { + "External id": 978660,"Sequence number": 10552333, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1251 + } + }, + { + "ph": "f", "id": 133, "pid": 2338708, "tid": 2379421, "ts": 6345937252170.572, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937252173.373, "dur": 2.842, + "args": { + "External id": 978661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937252173.984, "dur": 1.734, + "args": { + "External id": 978662,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937252174.929, "dur": 0.651, + "args": { + "External id": 978663,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937252179.701, "dur": 47.595, + "args": { + "External id": 978664,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937252228.381, "dur": 3.735, + "args": { + "External id": 978665,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937252229.362, "dur": 2.087, + "args": { + "External id": 978666,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937252230.362, "dur": 0.948, + "args": { + "External id": 978667,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937252233.249, "dur": 7.805, + "args": { + "External id": 978668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937252234.332, "dur": 6.254, + "args": { + "External id": 978669,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937252239.759, "dur": 0.693, + "args": { + "External id": 978670,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937252241.791, "dur": 39.839, + "args": { + "External id": 978671,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937252298.471, "dur": 29.904, + "args": { + "External id": 978672,"Record function id": 0, "Sequence number": 10552332, "Fwd thread id": 1, "Ev Idx": 1263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937252299.728, "dur": 4.530, + "args": { + "External id": 978673,"Sequence number": 10552332, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1264 + } + }, + { + "ph": "f", "id": 134, "pid": 2338708, "tid": 2379421, "ts": 6345937252299.728, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937252301.050, "dur": 3.015, + "args": { + "External id": 978674,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937252301.980, "dur": 1.921, + "args": { + "External id": 978675,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937252307.420, "dur": 18.203, + "args": { + "External id": 978676,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937252332.688, "dur": 14.176, + "args": { + "External id": 978677,"Record function id": 0, "Sequence number": 10552331, "Fwd thread id": 1, "Ev Idx": 1268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937252334.224, "dur": 9.862, + "args": { + "External id": 978678,"Sequence number": 10552331, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1269 + } + }, + { + "ph": "f", "id": 135, "pid": 2338708, "tid": 2379421, "ts": 6345937252334.224, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937252335.432, "dur": 8.395, + "args": { + "External id": 978679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937252336.469, "dur": 6.701, + "args": { + "External id": 978680,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937252342.022, "dur": 1.003, + "args": { + "External id": 978681,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937252351.682, "dur": 5.953, + "args": { + "External id": 978682,"Record function id": 0, "Ev Idx": 1273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937252353.199, "dur": 3.855, + "args": { + "External id": 978683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937252354.359, "dur": 2.357, + "args": { + "External id": 978684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937252355.389, "dur": 1.190, + "args": { + "External id": 978685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937252364.983, "dur": 394.481, + "args": { + "External id": 978686,"Record function id": 0, "Sequence number": 10552330, "Fwd thread id": 1, "Ev Idx": 1277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937252366.591, "dur": 361.760, + "args": { + "External id": 978687,"Sequence number": 10552330, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1278 + } + }, + { + "ph": "f", "id": 136, "pid": 2338708, "tid": 2379421, "ts": 6345937252366.591, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937252404.515, "dur": 2.541, + "args": { + "External id": 978688,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937252405.230, "dur": 1.605, + "args": { + "External id": 978689,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937252426.371, "dur": 4.383, + "args": { + "External id": 978690,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937252440.986, "dur": 2.861, + "args": { + "External id": 978691,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937252611.333, "dur": 2.911, + "args": { + "External id": 978692,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937252618.415, "dur": 39.996, + "args": { + "External id": 978693,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937252632.865, "dur": 1.015, + "args": { + "External id": 978694,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937252665.061, "dur": 35.520, + "args": { + "External id": 978695,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937252666.980, "dur": 33.360, + "args": { + "External id": 978696,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937252673.796, "dur": 6.108, + "args": { + "External id": 978697,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937252681.632, "dur": 17.924, + "args": { + "External id": 978698,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345937252707.573, "dur": 2.939, + "args": { + "External id": 978699,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937252708.981, "dur": 1.406, + "args": { + "External id": 978700,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937252717.886, "dur": 2.238, + "args": { + "External id": 978701,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937252718.757, "dur": 1.204, + "args": { + "External id": 978702,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937252737.955, "dur": 16.815, + "args": { + "External id": 978703,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937252769.151, "dur": 13.639, + "args": { + "External id": 978704,"Record function id": 0, "Ev Idx": 1295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937252773.756, "dur": 8.100, + "args": { + "External id": 978705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937252775.644, "dur": 5.127, + "args": { + "External id": 978706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937252779.202, "dur": 1.457, + "args": { + "External id": 978707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937252787.550, "dur": 6.589, + "args": { + "External id": 978708,"Record function id": 0, "Sequence number": 10552329, "Fwd thread id": 1, "Ev Idx": 1299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937252789.116, "dur": 1.464, + "args": { + "External id": 978709,"Sequence number": 10552329, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1300 + } + }, + { + "ph": "f", "id": 137, "pid": 2338708, "tid": 2379421, "ts": 6345937252789.116, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937252798.331, "dur": 544.654, + "args": { + "External id": 978710,"Record function id": 0, "Sequence number": 10552328, "Fwd thread id": 1, "Ev Idx": 1301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937252799.842, "dur": 528.397, + "args": { + "External id": 978711,"Sequence number": 10552328, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1302 + } + }, + { + "ph": "f", "id": 138, "pid": 2338708, "tid": 2379421, "ts": 6345937252799.842, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937252831.764, "dur": 9.595, + "args": { + "External id": 978712,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937252837.287, "dur": 3.773, + "args": { + "External id": 978713,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 1304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937252844.733, "dur": 9.051, + "args": { + "External id": 978714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937252848.504, "dur": 4.546, + "args": { + "External id": 978715,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937252852.026, "dur": 0.859, + "args": { + "External id": 978716,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2379421, + "ts": 6345937252857.825, "dur": 87.423, + "args": { + "External id": 978717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 1308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937252859.083, "dur": 2.734, + "args": { + "External id": 978718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 1309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937252859.972, "dur": 1.357, + "args": { + "External id": 978719,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937252860.769, "dur": 0.445, + "args": { + "External id": 978720,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2379421, + "ts": 6345937252865.226, "dur": 79.345, + "args": { + "External id": 978721,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 1312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937252866.584, "dur": 77.042, + "args": { + "External id": 978722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 1313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345937252950.068, "dur": 2.618, + "args": { + "External id": 978723,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 1314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937252950.970, "dur": 1.550, + "args": { + "External id": 978724,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 1315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937252988.598, "dur": 6.216, + "args": { + "External id": 978725,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937252998.450, "dur": 4.115, + "args": { + "External id": 978726,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937253003.478, "dur": 1.808, + "args": { + "External id": 978727,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937253111.598, "dur": 4.077, + "args": { + "External id": 978728,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937253112.496, "dur": 2.845, + "args": { + "External id": 978729,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338708, "tid": 2379421, + "ts": 6345937253141.297, "dur": 164.396, + "args": { + "External id": 978730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 1321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345937253147.421, "dur": 9.372, + "args": { + "External id": 978731,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253154.032, "dur": 1.697, + "args": { + "External id": 978732,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937253158.256, "dur": 8.830, + "args": { + "External id": 978733,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 1324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253165.381, "dur": 0.817, + "args": { + "External id": 978734,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 1325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345937253169.087, "dur": 1.861, + "args": { + "External id": 978735,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 1326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253169.819, "dur": 0.770, + "args": { + "External id": 978736,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937253174.246, "dur": 2.639, + "args": { + "External id": 978737,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253175.489, "dur": 0.802, + "args": { + "External id": 978738,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 1329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937253185.186, "dur": 4.665, + "args": { + "External id": 978739,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 1330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253186.459, "dur": 3.063, + "args": { + "External id": 978740,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 1331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937253190.928, "dur": 8.667, + "args": { + "External id": 978741,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 1332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937253196.962, "dur": 2.374, + "args": { + "External id": 978742,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 1333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937253200.374, "dur": 3.872, + "args": { + "External id": 978743,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 1334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253203.565, "dur": 0.311, + "args": { + "External id": 978744,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 1335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937253204.874, "dur": 3.952, + "args": { + "External id": 978745,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937253205.498, "dur": 3.200, + "args": { + "External id": 978746,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 1337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345937253210.282, "dur": 75.343, + "args": { + "External id": 978747,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 1338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937253290.777, "dur": 1.703, + "args": { + "External id": 978748,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 1339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345937253295.618, "dur": 4.858, + "args": { + "External id": 978749,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 1340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253299.221, "dur": 0.778, + "args": { + "External id": 978750,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 1341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937253303.233, "dur": 1.096, + "args": { + "External id": 978751,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 1342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937253358.913, "dur": 17.796, + "args": { + "External id": 978752,"Record function id": 0, "Ev Idx": 1343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937253361.428, "dur": 14.334, + "args": { + "External id": 978753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937253363.943, "dur": 10.809, + "args": { + "External id": 978754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937253369.573, "dur": 5.018, + "args": { + "External id": 978755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253381.266, "dur": 10.328, + "args": { + "External id": 978756,"Record function id": 0, "Sequence number": 10552327, "Fwd thread id": 1, "Ev Idx": 1347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253382.691, "dur": 6.495, + "args": { + "External id": 978757,"Sequence number": 10552327, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1348 + } + }, + { + "ph": "f", "id": 139, "pid": 2338708, "tid": 2379421, "ts": 6345937253382.691, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937253386.808, "dur": 2.041, + "args": { + "External id": 978758,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937253387.586, "dur": 1.109, + "args": { + "External id": 978759,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253395.654, "dur": 129.556, + "args": { + "External id": 978760,"Record function id": 0, "Sequence number": 10552326, "Fwd thread id": 1, "Ev Idx": 1351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253396.529, "dur": 120.505, + "args": { + "External id": 978761,"Sequence number": 10552326, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1352 + } + }, + { + "ph": "f", "id": 140, "pid": 2338708, "tid": 2379421, "ts": 6345937253396.529, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937253400.228, "dur": 7.045, + "args": { + "External id": 978762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937253401.741, "dur": 4.798, + "args": { + "External id": 978763,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253405.588, "dur": 0.780, + "args": { + "External id": 978764,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937253408.543, "dur": 51.744, + "args": { + "External id": 978765,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937253461.710, "dur": 6.221, + "args": { + "External id": 978766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937253462.795, "dur": 4.512, + "args": { + "External id": 978767,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253466.021, "dur": 1.150, + "args": { + "External id": 978768,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937253469.613, "dur": 5.353, + "args": { + "External id": 978769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937253470.602, "dur": 3.878, + "args": { + "External id": 978770,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253473.873, "dur": 0.455, + "args": { + "External id": 978771,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937253475.516, "dur": 40.516, + "args": { + "External id": 978772,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253530.869, "dur": 9.422, + "args": { + "External id": 978773,"Record function id": 0, "Sequence number": 10552325, "Fwd thread id": 1, "Ev Idx": 1364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253532.279, "dur": 6.278, + "args": { + "External id": 978774,"Sequence number": 10552325, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1365 + } + }, + { + "ph": "f", "id": 141, "pid": 2338708, "tid": 2379421, "ts": 6345937253532.279, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937253534.000, "dur": 4.373, + "args": { + "External id": 978775,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937253536.800, "dur": 1.396, + "args": { + "External id": 978776,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253544.347, "dur": 11.607, + "args": { + "External id": 978777,"Record function id": 0, "Sequence number": 10552324, "Fwd thread id": 1, "Ev Idx": 1368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253545.427, "dur": 8.488, + "args": { + "External id": 978778,"Sequence number": 10552324, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1369 + } + }, + { + "ph": "f", "id": 142, "pid": 2338708, "tid": 2379421, "ts": 6345937253545.427, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937253546.307, "dur": 7.366, + "args": { + "External id": 978779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937253546.983, "dur": 6.161, + "args": { + "External id": 978780,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253550.196, "dur": 2.838, + "args": { + "External id": 978781,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937253560.542, "dur": 5.140, + "args": { + "External id": 978782,"Record function id": 0, "Ev Idx": 1373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937253561.827, "dur": 3.288, + "args": { + "External id": 978783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937253562.840, "dur": 1.927, + "args": { + "External id": 978784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937253563.551, "dur": 1.089, + "args": { + "External id": 978785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253569.552, "dur": 7.966, + "args": { + "External id": 978786,"Record function id": 0, "Sequence number": 10552323, "Fwd thread id": 1, "Ev Idx": 1377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253570.367, "dur": 4.963, + "args": { + "External id": 978787,"Sequence number": 10552323, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 1378 + } + }, + { + "ph": "f", "id": 143, "pid": 2338708, "tid": 2379421, "ts": 6345937253570.367, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937253573.620, "dur": 1.529, + "args": { + "External id": 978788,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937253574.316, "dur": 0.670, + "args": { + "External id": 978789,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 1380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253581.375, "dur": 108.059, + "args": { + "External id": 978790,"Record function id": 0, "Sequence number": 10552322, "Fwd thread id": 1, "Ev Idx": 1381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253584.747, "dur": 95.975, + "args": { + "External id": 978791,"Sequence number": 10552322, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1382 + } + }, + { + "ph": "f", "id": 144, "pid": 2338708, "tid": 2379421, "ts": 6345937253584.747, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937253587.020, "dur": 2.597, + "args": { + "External id": 978792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 1383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937253587.588, "dur": 1.533, + "args": { + "External id": 978793,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 1384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253588.396, "dur": 0.545, + "args": { + "External id": 978794,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 1385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937253590.220, "dur": 38.348, + "args": { + "External id": 978795,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 1386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937253629.747, "dur": 4.988, + "args": { + "External id": 978796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937253630.333, "dur": 3.882, + "args": { + "External id": 978797,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 1388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253633.187, "dur": 0.907, + "args": { + "External id": 978798,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 1389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937253636.096, "dur": 4.915, + "args": { + "External id": 978799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937253636.989, "dur": 3.571, + "args": { + "External id": 978800,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253640.071, "dur": 0.403, + "args": { + "External id": 978801,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937253641.706, "dur": 38.110, + "args": { + "External id": 978802,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 1393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253697.114, "dur": 40.837, + "args": { + "External id": 978803,"Record function id": 0, "Sequence number": 10552321, "Fwd thread id": 1, "Ev Idx": 1394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253698.399, "dur": 8.528, + "args": { + "External id": 978804,"Sequence number": 10552321, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1395 + } + }, + { + "ph": "f", "id": 145, "pid": 2338708, "tid": 2379421, "ts": 6345937253698.399, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937253702.222, "dur": 4.517, + "args": { + "External id": 978805,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937253703.005, "dur": 3.564, + "args": { + "External id": 978806,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345937253710.265, "dur": 24.225, + "args": { + "External id": 978807,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253742.715, "dur": 11.386, + "args": { + "External id": 978808,"Record function id": 0, "Sequence number": 10552320, "Fwd thread id": 1, "Ev Idx": 1399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937253744.193, "dur": 7.531, + "args": { + "External id": 978809,"Sequence number": 10552320, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1400 + } + }, + { + "ph": "f", "id": 146, "pid": 2338708, "tid": 2379421, "ts": 6345937253744.193, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937253745.573, "dur": 5.868, + "args": { + "External id": 978810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 1401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937253746.682, "dur": 4.202, + "args": { + "External id": 978811,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 1402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937253750.021, "dur": 0.761, + "args": { + "External id": 978812,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 1403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937253758.707, "dur": 8.084, + "args": { + "External id": 978813,"Record function id": 0, "Ev Idx": 1404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937253760.244, "dur": 5.998, + "args": { + "External id": 978814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937253761.293, "dur": 4.622, + "args": { + "External id": 978815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937253764.456, "dur": 1.299, + "args": { + "External id": 978816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937253771.323, "dur": 568.240, + "args": { + "External id": 978817,"Record function id": 0, "Sequence number": 10552319, "Fwd thread id": 1, "Ev Idx": 1408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937253772.891, "dur": 523.671, + "args": { + "External id": 978818,"Sequence number": 10552319, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 1409 + } + }, + { + "ph": "f", "id": 147, "pid": 2338708, "tid": 2379421, "ts": 6345937253772.891, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2379421, + "ts": 6345937253798.257, "dur": 39.337, + "args": { + "External id": 978819,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345937253800.114, "dur": 37.260, + "args": { + "External id": 978820,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937253803.347, "dur": 6.401, + "args": { + "External id": 978821,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 1412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937253805.791, "dur": 3.300, + "args": { + "External id": 978822,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937253811.207, "dur": 25.461, + "args": { + "External id": 978823,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937253851.894, "dur": 3.033, + "args": { + "External id": 978824,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937253853.058, "dur": 1.672, + "args": { + "External id": 978825,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937253862.125, "dur": 1.620, + "args": { + "External id": 978826,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937253862.728, "dur": 0.898, + "args": { + "External id": 978827,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937253877.518, "dur": 2.792, + "args": { + "External id": 978828,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937253892.287, "dur": 4.081, + "args": { + "External id": 978829,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937254158.662, "dur": 4.377, + "args": { + "External id": 978830,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937254169.872, "dur": 42.562, + "args": { + "External id": 978831,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937254183.687, "dur": 1.133, + "args": { + "External id": 978832,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937254219.442, "dur": 36.320, + "args": { + "External id": 978833,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937254221.647, "dur": 33.808, + "args": { + "External id": 978834,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937254229.018, "dur": 4.781, + "args": { + "External id": 978835,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937254235.429, "dur": 19.372, + "args": { + "External id": 978836,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345937254261.449, "dur": 2.497, + "args": { + "External id": 978837,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937254262.477, "dur": 1.332, + "args": { + "External id": 978838,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937254274.959, "dur": 1.965, + "args": { + "External id": 978839,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937254275.502, "dur": 1.288, + "args": { + "External id": 978840,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937254279.203, "dur": 5.101, + "args": { + "External id": 978841,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937254283.023, "dur": 1.175, + "args": { + "External id": 978842,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937254317.420, "dur": 20.119, + "args": { + "External id": 978843,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937254355.394, "dur": 11.349, + "args": { + "External id": 978844,"Record function id": 0, "Ev Idx": 1435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937254358.035, "dur": 7.626, + "args": { + "External id": 978845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937254361.081, "dur": 3.575, + "args": { + "External id": 978846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937254362.489, "dur": 1.992, + "args": { + "External id": 978847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937254370.861, "dur": 11.301, + "args": { + "External id": 978848,"Record function id": 0, "Sequence number": 10552318, "Fwd thread id": 1, "Ev Idx": 1439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937254372.055, "dur": 7.536, + "args": { + "External id": 978849,"Sequence number": 10552318, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1440 + } + }, + { + "ph": "f", "id": 148, "pid": 2338708, "tid": 2379421, "ts": 6345937254372.055, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937254377.178, "dur": 2.134, + "args": { + "External id": 978850,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937254377.816, "dur": 1.310, + "args": { + "External id": 978851,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937254386.285, "dur": 164.852, + "args": { + "External id": 978852,"Record function id": 0, "Sequence number": 10552317, "Fwd thread id": 1, "Ev Idx": 1443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937254387.531, "dur": 155.946, + "args": { + "External id": 978853,"Sequence number": 10552317, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1444 + } + }, + { + "ph": "f", "id": 149, "pid": 2338708, "tid": 2379421, "ts": 6345937254387.531, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937254390.907, "dur": 7.519, + "args": { + "External id": 978854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937254392.606, "dur": 5.154, + "args": { + "External id": 978855,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937254396.581, "dur": 0.946, + "args": { + "External id": 978856,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937254399.385, "dur": 79.221, + "args": { + "External id": 978857,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937254479.836, "dur": 9.475, + "args": { + "External id": 978858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937254480.913, "dur": 7.763, + "args": { + "External id": 978859,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937254484.797, "dur": 3.648, + "args": { + "External id": 978860,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937254490.807, "dur": 5.316, + "args": { + "External id": 978861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937254491.827, "dur": 3.697, + "args": { + "External id": 978862,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937254494.926, "dur": 0.462, + "args": { + "External id": 978863,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937254496.897, "dur": 45.725, + "args": { + "External id": 978864,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937254556.999, "dur": 10.311, + "args": { + "External id": 978865,"Record function id": 0, "Sequence number": 10552316, "Fwd thread id": 1, "Ev Idx": 1456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937254558.059, "dur": 6.831, + "args": { + "External id": 978866,"Sequence number": 10552316, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1457 + } + }, + { + "ph": "f", "id": 150, "pid": 2338708, "tid": 2379421, "ts": 6345937254558.059, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937254559.746, "dur": 4.966, + "args": { + "External id": 978867,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937254563.149, "dur": 1.393, + "args": { + "External id": 978868,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937254574.123, "dur": 10.920, + "args": { + "External id": 978869,"Record function id": 0, "Sequence number": 10552315, "Fwd thread id": 1, "Ev Idx": 1460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937254575.245, "dur": 7.299, + "args": { + "External id": 978870,"Sequence number": 10552315, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1461 + } + }, + { + "ph": "f", "id": 151, "pid": 2338708, "tid": 2379421, "ts": 6345937254575.245, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937254576.505, "dur": 5.818, + "args": { + "External id": 978871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937254579.796, "dur": 2.015, + "args": { + "External id": 978872,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937254581.003, "dur": 0.672, + "args": { + "External id": 978873,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937254589.407, "dur": 5.206, + "args": { + "External id": 978874,"Record function id": 0, "Ev Idx": 1465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937254590.762, "dur": 3.303, + "args": { + "External id": 978875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937254591.942, "dur": 1.886, + "args": { + "External id": 978876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937254592.507, "dur": 1.184, + "args": { + "External id": 978877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937254598.398, "dur": 6.849, + "args": { + "External id": 978878,"Record function id": 0, "Sequence number": 10552314, "Fwd thread id": 1, "Ev Idx": 1469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937254599.341, "dur": 3.876, + "args": { + "External id": 978879,"Sequence number": 10552314, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1470 + } + }, + { + "ph": "f", "id": 152, "pid": 2338708, "tid": 2379421, "ts": 6345937254599.341, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937254600.400, "dur": 2.636, + "args": { + "External id": 978880,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937254601.209, "dur": 1.654, + "args": { + "External id": 978881,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937254612.910, "dur": 393.474, + "args": { + "External id": 978882,"Record function id": 0, "Sequence number": 10552313, "Fwd thread id": 1, "Ev Idx": 1473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937254614.334, "dur": 371.913, + "args": { + "External id": 978883,"Sequence number": 10552313, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1474 + } + }, + { + "ph": "f", "id": 153, "pid": 2338708, "tid": 2379421, "ts": 6345937254614.334, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937254631.276, "dur": 6.499, + "args": { + "External id": 978884,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937254633.575, "dur": 3.660, + "args": { + "External id": 978885,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937254639.838, "dur": 8.496, + "args": { + "External id": 978886,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937254643.350, "dur": 4.578, + "args": { + "External id": 978887,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937254649.758, "dur": 3.320, + "args": { + "External id": 978888,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937254650.674, "dur": 2.194, + "args": { + "External id": 978889,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937254685.003, "dur": 272.744, + "args": { + "External id": 978890,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937254778.894, "dur": 5.504, + "args": { + "External id": 978891,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937254786.187, "dur": 4.719, + "args": { + "External id": 978892,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937254791.999, "dur": 1.793, + "args": { + "External id": 978893,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937254794.672, "dur": 1.777, + "args": { + "External id": 978894,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937254845.217, "dur": 5.611, + "args": { + "External id": 978895,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937254848.669, "dur": 1.993, + "args": { + "External id": 978896,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937254855.307, "dur": 30.371, + "args": { + "External id": 978897,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937254862.818, "dur": 0.933, + "args": { + "External id": 978898,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937254887.253, "dur": 1.366, + "args": { + "External id": 978899,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937254887.946, "dur": 0.591, + "args": { + "External id": 978900,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937254889.468, "dur": 19.488, + "args": { + "External id": 978901,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937254891.031, "dur": 3.510, + "args": { + "External id": 978902,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937254971.871, "dur": 4.001, + "args": { + "External id": 978903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937254979.080, "dur": 0.967, + "args": { + "External id": 978904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345937254982.144, "dur": 0.983, + "args": { + "External id": 978905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937255039.104, "dur": 314.915, + "args": { + "External id": 978906,"Record function id": 0, "Sequence number": 10552312, "Fwd thread id": 1, "Ev Idx": 1497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937255041.238, "dur": 303.219, + "args": { + "External id": 978907,"Sequence number": 10552312, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1498 + } + }, + { + "ph": "f", "id": 154, "pid": 2338708, "tid": 2379421, "ts": 6345937255041.238, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345937255104.737, "dur": 57.067, + "args": { + "External id": 978908,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937255111.085, "dur": 5.835, + "args": { + "External id": 978909,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937255118.771, "dur": 42.103, + "args": { + "External id": 978910,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 1501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937255173.618, "dur": 4.745, + "args": { + "External id": 978911,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 1502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937255175.132, "dur": 2.833, + "args": { + "External id": 978912,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937255365.452, "dur": 192.362, + "args": { + "External id": 978913,"Record function id": 0, "Sequence number": 10552311, "Fwd thread id": 1, "Ev Idx": 1504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937255367.679, "dur": 182.238, + "args": { + "External id": 978914,"Sequence number": 10552311, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1505 + } + }, + { + "ph": "f", "id": 155, "pid": 2338708, "tid": 2379421, "ts": 6345937255367.679, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345937255387.649, "dur": 44.784, + "args": { + "External id": 978915,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937255391.647, "dur": 2.983, + "args": { + "External id": 978916,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937255395.754, "dur": 36.157, + "args": { + "External id": 978917,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 1508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345937255441.259, "dur": 6.973, + "args": { + "External id": 978918,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 1509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937255442.737, "dur": 5.180, + "args": { + "External id": 978919,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255565.725, "dur": 22.455, + "args": { + "External id": 978920,"Record function id": 0, "Sequence number": 10552310, "Fwd thread id": 1, "Ev Idx": 1511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255567.231, "dur": 17.410, + "args": { + "External id": 978921,"Sequence number": 10552310, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1512 + } + }, + { + "ph": "f", "id": 156, "pid": 2338708, "tid": 2379421, "ts": 6345937255567.231, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937255570.144, "dur": 14.152, + "args": { + "External id": 978922,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937255575.929, "dur": 8.193, + "args": { + "External id": 978923,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255594.543, "dur": 6.347, + "args": { + "External id": 978924,"Record function id": 0, "Sequence number": 10552309, "Fwd thread id": 1, "Ev Idx": 1515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255595.680, "dur": 3.443, + "args": { + "External id": 978925,"Sequence number": 10552309, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 1516 + } + }, + { + "ph": "f", "id": 157, "pid": 2338708, "tid": 2379421, "ts": 6345937255595.680, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937255596.908, "dur": 2.038, + "args": { + "External id": 978926,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937255597.797, "dur": 0.948, + "args": { + "External id": 978927,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 1518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255604.576, "dur": 9.353, + "args": { + "External id": 978928,"Record function id": 0, "Sequence number": 10552308, "Fwd thread id": 1, "Ev Idx": 1519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255605.782, "dur": 5.511, + "args": { + "External id": 978929,"Sequence number": 10552308, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 1520 + } + }, + { + "ph": "f", "id": 158, "pid": 2338708, "tid": 2379421, "ts": 6345937255605.782, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937255607.135, "dur": 3.974, + "args": { + "External id": 978930,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937255609.787, "dur": 1.214, + "args": { + "External id": 978931,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255617.706, "dur": 9.109, + "args": { + "External id": 978932,"Record function id": 0, "Sequence number": 10552307, "Fwd thread id": 1, "Ev Idx": 1523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255618.795, "dur": 5.499, + "args": { + "External id": 978933,"Sequence number": 10552307, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1524 + } + }, + { + "ph": "f", "id": 159, "pid": 2338708, "tid": 2379421, "ts": 6345937255618.795, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937255620.100, "dur": 4.036, + "args": { + "External id": 978934,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937255622.999, "dur": 0.968, + "args": { + "External id": 978935,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255631.063, "dur": 201.671, + "args": { + "External id": 978936,"Record function id": 0, "Sequence number": 10552306, "Fwd thread id": 1, "Ev Idx": 1527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255631.986, "dur": 192.204, + "args": { + "External id": 978937,"Sequence number": 10552306, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1528 + } + }, + { + "ph": "f", "id": 160, "pid": 2338708, "tid": 2379421, "ts": 6345937255631.986, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937255636.168, "dur": 6.604, + "args": { + "External id": 978938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937255638.350, "dur": 3.669, + "args": { + "External id": 978939,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937255640.341, "dur": 1.325, + "args": { + "External id": 978940,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937255646.881, "dur": 77.074, + "args": { + "External id": 978941,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937255725.751, "dur": 6.182, + "args": { + "External id": 978942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937255726.863, "dur": 4.304, + "args": { + "External id": 978943,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937255729.782, "dur": 1.192, + "args": { + "External id": 978944,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937255763.006, "dur": 5.317, + "args": { + "External id": 978945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937255764.093, "dur": 3.700, + "args": { + "External id": 978946,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937255767.268, "dur": 0.400, + "args": { + "External id": 978947,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937255769.069, "dur": 54.078, + "args": { + "External id": 978948,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255839.258, "dur": 9.649, + "args": { + "External id": 978949,"Record function id": 0, "Sequence number": 10552305, "Fwd thread id": 1, "Ev Idx": 1540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255840.446, "dur": 6.326, + "args": { + "External id": 978950,"Sequence number": 10552305, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1541 + } + }, + { + "ph": "f", "id": 161, "pid": 2338708, "tid": 2379421, "ts": 6345937255840.446, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937255842.040, "dur": 4.551, + "args": { + "External id": 978951,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937255845.116, "dur": 1.313, + "args": { + "External id": 978952,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255852.860, "dur": 10.405, + "args": { + "External id": 978953,"Record function id": 0, "Sequence number": 10552304, "Fwd thread id": 1, "Ev Idx": 1544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255853.907, "dur": 6.666, + "args": { + "External id": 978954,"Sequence number": 10552304, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1545 + } + }, + { + "ph": "f", "id": 162, "pid": 2338708, "tid": 2379421, "ts": 6345937255853.907, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937255854.887, "dur": 5.433, + "args": { + "External id": 978955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937255858.109, "dur": 1.645, + "args": { + "External id": 978956,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937255858.942, "dur": 0.680, + "args": { + "External id": 978957,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937255869.777, "dur": 10.377, + "args": { + "External id": 978958,"Record function id": 0, "Ev Idx": 1549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937255871.327, "dur": 7.922, + "args": { + "External id": 978959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937255874.426, "dur": 4.367, + "args": { + "External id": 978960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937255875.686, "dur": 2.952, + "args": { + "External id": 978961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255883.883, "dur": 7.674, + "args": { + "External id": 978962,"Record function id": 0, "Sequence number": 10552303, "Fwd thread id": 1, "Ev Idx": 1553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255884.792, "dur": 4.595, + "args": { + "External id": 978963,"Sequence number": 10552303, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 1554 + } + }, + { + "ph": "f", "id": 163, "pid": 2338708, "tid": 2379421, "ts": 6345937255884.792, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937255887.660, "dur": 1.548, + "args": { + "External id": 978964,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937255888.114, "dur": 0.938, + "args": { + "External id": 978965,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 1556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255897.687, "dur": 108.761, + "args": { + "External id": 978966,"Record function id": 0, "Sequence number": 10552302, "Fwd thread id": 1, "Ev Idx": 1557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937255898.753, "dur": 100.189, + "args": { + "External id": 978967,"Sequence number": 10552302, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1558 + } + }, + { + "ph": "f", "id": 164, "pid": 2338708, "tid": 2379421, "ts": 6345937255898.753, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937255900.587, "dur": 5.007, + "args": { + "External id": 978968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 1559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937255901.111, "dur": 3.972, + "args": { + "External id": 978969,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 1560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937255904.565, "dur": 0.360, + "args": { + "External id": 978970,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 1561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937255906.240, "dur": 33.790, + "args": { + "External id": 978971,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 1562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937255943.067, "dur": 4.764, + "args": { + "External id": 978972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937255943.723, "dur": 3.464, + "args": { + "External id": 978973,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 1564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937255944.826, "dur": 2.181, + "args": { + "External id": 978974,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 1565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937255948.991, "dur": 7.607, + "args": { + "External id": 978975,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937255949.921, "dur": 6.211, + "args": { + "External id": 978976,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937255953.345, "dur": 2.656, + "args": { + "External id": 978977,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937255959.613, "dur": 38.351, + "args": { + "External id": 978978,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 1569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937256038.103, "dur": 82.731, + "args": { + "External id": 978979,"Record function id": 0, "Sequence number": 10552301, "Fwd thread id": 1, "Ev Idx": 1570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937256039.736, "dur": 5.525, + "args": { + "External id": 978980,"Sequence number": 10552301, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1571 + } + }, + { + "ph": "f", "id": 165, "pid": 2338708, "tid": 2379421, "ts": 6345937256039.736, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937256041.872, "dur": 3.199, + "args": { + "External id": 978981,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937256042.619, "dur": 2.186, + "args": { + "External id": 978982,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345937256049.370, "dur": 67.205, + "args": { + "External id": 978983,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937256130.529, "dur": 13.307, + "args": { + "External id": 978984,"Record function id": 0, "Sequence number": 10552300, "Fwd thread id": 1, "Ev Idx": 1575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937256132.150, "dur": 8.895, + "args": { + "External id": 978985,"Sequence number": 10552300, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1576 + } + }, + { + "ph": "f", "id": 166, "pid": 2338708, "tid": 2379421, "ts": 6345937256132.150, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937256133.521, "dur": 7.276, + "args": { + "External id": 978986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 1577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937256134.846, "dur": 4.992, + "args": { + "External id": 978987,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 1578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937256138.998, "dur": 0.706, + "args": { + "External id": 978988,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 1579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937256148.866, "dur": 9.866, + "args": { + "External id": 978989,"Record function id": 0, "Ev Idx": 1580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937256150.276, "dur": 7.635, + "args": { + "External id": 978990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937256152.192, "dur": 5.295, + "args": { + "External id": 978991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937256155.443, "dur": 1.919, + "args": { + "External id": 978992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937256162.264, "dur": 6.884, + "args": { + "External id": 978993,"Record function id": 0, "Sequence number": 10552299, "Fwd thread id": 1, "Ev Idx": 1584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937256163.808, "dur": 3.200, + "args": { + "External id": 978994,"Sequence number": 10552299, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1585 + } + }, + { + "ph": "f", "id": 167, "pid": 2338708, "tid": 2379421, "ts": 6345937256163.808, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937256164.995, "dur": 1.809, + "args": { + "External id": 978995,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937256165.536, "dur": 1.087, + "args": { + "External id": 978996,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937256172.716, "dur": 122.225, + "args": { + "External id": 978997,"Record function id": 0, "Sequence number": 10552298, "Fwd thread id": 1, "Ev Idx": 1588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937256173.666, "dur": 111.256, + "args": { + "External id": 978998,"Sequence number": 10552298, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1589 + } + }, + { + "ph": "f", "id": 168, "pid": 2338708, "tid": 2379421, "ts": 6345937256173.666, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937256176.329, "dur": 5.072, + "args": { + "External id": 978999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937256179.403, "dur": 1.528, + "args": { + "External id": 979000,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 1591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937256180.270, "dur": 0.526, + "args": { + "External id": 979001,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 1592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937256182.114, "dur": 48.778, + "args": { + "External id": 979002,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 1593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937256232.055, "dur": 6.218, + "args": { + "External id": 979003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937256232.697, "dur": 4.893, + "args": { + "External id": 979004,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937256236.350, "dur": 1.110, + "args": { + "External id": 979005,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937256241.575, "dur": 2.573, + "args": { + "External id": 979006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937256242.283, "dur": 1.352, + "args": { + "External id": 979007,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937256242.943, "dur": 0.550, + "args": { + "External id": 979008,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937256244.636, "dur": 39.287, + "args": { + "External id": 979009,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 1600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937256300.215, "dur": 34.492, + "args": { + "External id": 979010,"Record function id": 0, "Sequence number": 10552297, "Fwd thread id": 1, "Ev Idx": 1601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937256301.305, "dur": 7.884, + "args": { + "External id": 979011,"Sequence number": 10552297, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 1602 + } + }, + { + "ph": "f", "id": 169, "pid": 2338708, "tid": 2379421, "ts": 6345937256301.305, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937256304.962, "dur": 4.058, + "args": { + "External id": 979012,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937256307.350, "dur": 1.516, + "args": { + "External id": 979013,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937256312.022, "dur": 19.522, + "args": { + "External id": 979014,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937256338.944, "dur": 7.248, + "args": { + "External id": 979015,"Record function id": 0, "Sequence number": 10552296, "Fwd thread id": 1, "Ev Idx": 1606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345937256339.962, "dur": 4.410, + "args": { + "External id": 979016,"Sequence number": 10552296, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1607 + } + }, + { + "ph": "f", "id": 170, "pid": 2338708, "tid": 2379421, "ts": 6345937256339.962, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345937256340.829, "dur": 3.316, + "args": { + "External id": 979017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 1608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345937256341.691, "dur": 1.926, + "args": { + "External id": 979018,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 1609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937256342.590, "dur": 0.832, + "args": { + "External id": 979019,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 1610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937256350.766, "dur": 5.696, + "args": { + "External id": 979020,"Record function id": 0, "Ev Idx": 1611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937256352.086, "dur": 3.710, + "args": { + "External id": 979021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937256353.258, "dur": 2.181, + "args": { + "External id": 979022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937256354.092, "dur": 1.173, + "args": { + "External id": 979023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937256361.072, "dur": 417.751, + "args": { + "External id": 979024,"Record function id": 0, "Sequence number": 10552295, "Fwd thread id": 1, "Ev Idx": 1615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937256362.376, "dur": 378.526, + "args": { + "External id": 979025,"Sequence number": 10552295, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1616 + } + }, + { + "ph": "f", "id": 171, "pid": 2338708, "tid": 2379421, "ts": 6345937256362.376, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937256405.527, "dur": 2.301, + "args": { + "External id": 979026,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937256406.096, "dur": 1.556, + "args": { + "External id": 979027,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 1618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937256423.635, "dur": 6.960, + "args": { + "External id": 979028,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937256440.510, "dur": 2.619, + "args": { + "External id": 979029,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937256616.842, "dur": 2.281, + "args": { + "External id": 979030,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 1621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937256623.643, "dur": 42.622, + "args": { + "External id": 979031,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 1622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937256638.096, "dur": 1.173, + "args": { + "External id": 979032,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 1623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937256672.286, "dur": 39.917, + "args": { + "External id": 979033,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 1624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937256677.043, "dur": 34.863, + "args": { + "External id": 979034,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 1625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937256681.415, "dur": 4.730, + "args": { + "External id": 979035,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937256688.188, "dur": 22.953, + "args": { + "External id": 979036,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 1627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345937256717.226, "dur": 2.910, + "args": { + "External id": 979037,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 1628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937256718.470, "dur": 1.475, + "args": { + "External id": 979038,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 1629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937256728.322, "dur": 4.612, + "args": { + "External id": 979039,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937256731.568, "dur": 1.237, + "args": { + "External id": 979040,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 1631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345937256754.621, "dur": 18.069, + "args": { + "External id": 979041,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 1632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937256788.643, "dur": 8.786, + "args": { + "External id": 979042,"Record function id": 0, "Ev Idx": 1633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937256790.973, "dur": 5.688, + "args": { + "External id": 979043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937256792.951, "dur": 2.652, + "args": { + "External id": 979044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937256793.894, "dur": 1.589, + "args": { + "External id": 979045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937256802.083, "dur": 3164.481, + "args": { + "External id": 979046,"Record function id": 0, "Ev Idx": 1637 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 2338708, "tid": 2379421, + "ts": 6345937256840.661, "dur": 1091.394, + "args": { + "External id": 979047,"Record function id": 0, "Ev Idx": 1638 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 2338708, "tid": 2379421, + "ts": 6345937256866.332, "dur": 1055.255, + "args": { + "External id": 979048,"Record function id": 0, "Ev Idx": 1639 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338708, "tid": 2379421, + "ts": 6345937256883.056, "dur": 1020.870, + "args": { + "External id": 979049,"Record function id": 0, "Ev Idx": 1640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937256964.583, "dur": 5.457, + "args": { + "External id": 979050,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937256986.681, "dur": 61.391, + "args": { + "External id": 979051,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937256994.932, "dur": 1.317, + "args": { + "External id": 979052,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937256999.104, "dur": 2.480, + "args": { + "External id": 979053,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257002.545, "dur": 0.320, + "args": { + "External id": 979054,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257005.317, "dur": 1.849, + "args": { + "External id": 979055,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257031.061, "dur": 0.598, + "args": { + "External id": 979056,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257034.671, "dur": 0.286, + "args": { + "External id": 979057,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257037.421, "dur": 0.403, + "args": { + "External id": 979058,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257038.873, "dur": 0.472, + "args": { + "External id": 979059,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257041.897, "dur": 0.306, + "args": { + "External id": 979060,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937257101.013, "dur": 52.970, + "args": { + "External id": 979061,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345937257203.610, "dur": 143.012, + "args": { + "External id": 979062,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937257217.773, "dur": 7.268, + "args": { + "External id": 979063,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345937257230.938, "dur": 15.587, + "args": { + "External id": 979064,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937257235.710, "dur": 10.301, + "args": { + "External id": 979065,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257241.084, "dur": 3.007, + "args": { + "External id": 979066,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937257255.479, "dur": 31.606, + "args": { + "External id": 979067,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257257.598, "dur": 0.501, + "args": { + "External id": 979068,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257260.585, "dur": 0.555, + "args": { + "External id": 979069,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257261.972, "dur": 0.354, + "args": { + "External id": 979070,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257266.477, "dur": 0.706, + "args": { + "External id": 979071,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257268.338, "dur": 0.426, + "args": { + "External id": 979072,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257269.685, "dur": 2.120, + "args": { + "External id": 979073,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257273.722, "dur": 2.694, + "args": { + "External id": 979074,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257277.403, "dur": 0.641, + "args": { + "External id": 979075,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937257280.929, "dur": 0.483, + "args": { + "External id": 979076,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937257300.858, "dur": 35.327, + "args": { + "External id": 979077,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937257415.570, "dur": 377.605, + "args": { + "External id": 979078,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937257452.552, "dur": 335.210, + "args": { + "External id": 979079,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1670, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937257463.374, "dur": 317.770, + "args": { + "External id": 979080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937257817.491, "dur": 2.428, + "args": { + "External id": 979081,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1672, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937257941.875, "dur": 1998.420, + "args": { + "External id": 979082,"Sequence number": 10552294, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1673 + } + }, + { + "ph": "f", "id": 172, "pid": 2338708, "tid": 2379421, "ts": 6345937257941.875, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937258138.907, "dur": 130.854, + "args": { + "External id": 979083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937258321.854, "dur": 45.118, + "args": { + "External id": 979084,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345937258390.864, "dur": 54.627, + "args": { + "External id": 979085,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937258457.415, "dur": 34.575, + "args": { + "External id": 979086,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937258499.064, "dur": 35.773, + "args": { + "External id": 979087,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937258543.927, "dur": 31.498, + "args": { + "External id": 979088,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937258584.533, "dur": 32.485, + "args": { + "External id": 979089,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937258648.429, "dur": 25.573, + "args": { + "External id": 979090,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937258699.271, "dur": 31.962, + "args": { + "External id": 979091,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937258757.648, "dur": 21.888, + "args": { + "External id": 979092,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937258798.808, "dur": 16.342, + "args": { + "External id": 979093,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937258825.503, "dur": 40.691, + "args": { + "External id": 979094,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937258870.322, "dur": 37.319, + "args": { + "External id": 979095,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937258939.298, "dur": 357.798, + "args": { + "External id": 979096,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937259092.090, "dur": 11.480, + "args": { + "External id": 979097,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937259106.641, "dur": 4.536, + "args": { + "External id": 979098,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937259112.253, "dur": 1.837, + "args": { + "External id": 979099,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937259115.593, "dur": 1.680, + "args": { + "External id": 979100,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937259166.378, "dur": 7.872, + "args": { + "External id": 979101,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937259169.151, "dur": 4.761, + "args": { + "External id": 979102,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937259176.517, "dur": 39.128, + "args": { + "External id": 979103,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937259187.328, "dur": 2.157, + "args": { + "External id": 979104,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937259217.598, "dur": 5.213, + "args": { + "External id": 979105,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937259222.048, "dur": 0.663, + "args": { + "External id": 979106,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937259224.224, "dur": 16.223, + "args": { + "External id": 979107,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937259226.753, "dur": 0.626, + "args": { + "External id": 979108,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937259342.043, "dur": 31.610, + "args": { + "External id": 979109,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937259395.029, "dur": 17.076, + "args": { + "External id": 979110,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937259421.111, "dur": 52.808, + "args": { + "External id": 979111,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937259483.733, "dur": 43.993, + "args": { + "External id": 979112,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937259536.918, "dur": 24.225, + "args": { + "External id": 979113,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937259567.576, "dur": 33.983, + "args": { + "External id": 979114,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937259609.426, "dur": 30.430, + "args": { + "External id": 979115,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937259649.819, "dur": 34.217, + "args": { + "External id": 979116,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345937259710.787, "dur": 29.367, + "args": { + "External id": 979117,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 1708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937259762.188, "dur": 29.679, + "args": { + "External id": 979118,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937259810.184, "dur": 20.485, + "args": { + "External id": 979119,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937259852.189, "dur": 18.521, + "args": { + "External id": 979120,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345937259888.189, "dur": 18.722, + "args": { + "External id": 979121,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 1712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937259993.005, "dur": 43.365, + "args": { + "External id": 979122,"Record function id": 0, "Ev Idx": 1713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937259996.666, "dur": 37.843, + "args": { + "External id": 979123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260001.289, "dur": 31.070, + "args": { + "External id": 979124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260002.949, "dur": 28.842, + "args": { + "External id": 979125,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260044.579, "dur": 7.501, + "args": { + "External id": 979126,"Record function id": 0, "Ev Idx": 1717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260046.706, "dur": 4.732, + "args": { + "External id": 979127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260048.328, "dur": 2.451, + "args": { + "External id": 979128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260049.206, "dur": 1.487, + "args": { + "External id": 979129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260105.029, "dur": 8.872, + "args": { + "External id": 979130,"Record function id": 0, "Ev Idx": 1721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260107.413, "dur": 5.594, + "args": { + "External id": 979131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260108.961, "dur": 2.879, + "args": { + "External id": 979132,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260109.786, "dur": 1.787, + "args": { + "External id": 979133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260118.075, "dur": 5.069, + "args": { + "External id": 979134,"Record function id": 0, "Ev Idx": 1725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260119.880, "dur": 2.789, + "args": { + "External id": 979135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260120.775, "dur": 1.342, + "args": { + "External id": 979136,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260121.263, "dur": 0.756, + "args": { + "External id": 979137,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260127.009, "dur": 9.861, + "args": { + "External id": 979138,"Record function id": 0, "Ev Idx": 1729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260128.533, "dur": 7.807, + "args": { + "External id": 979139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260134.313, "dur": 1.452, + "args": { + "External id": 979140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260134.659, "dur": 1.027, + "args": { + "External id": 979141,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260140.558, "dur": 7.806, + "args": { + "External id": 979142,"Record function id": 0, "Ev Idx": 1733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260142.652, "dur": 5.255, + "args": { + "External id": 979143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260143.421, "dur": 3.879, + "args": { + "External id": 979144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260146.215, "dur": 0.962, + "args": { + "External id": 979145,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260154.106, "dur": 4.508, + "args": { + "External id": 979146,"Record function id": 0, "Ev Idx": 1737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260155.548, "dur": 2.592, + "args": { + "External id": 979147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260156.233, "dur": 1.293, + "args": { + "External id": 979148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260156.759, "dur": 0.659, + "args": { + "External id": 979149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260162.336, "dur": 7.143, + "args": { + "External id": 979150,"Record function id": 0, "Ev Idx": 1741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260163.903, "dur": 5.131, + "args": { + "External id": 979151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260164.553, "dur": 4.005, + "args": { + "External id": 979152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260164.993, "dur": 3.442, + "args": { + "External id": 979153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260173.216, "dur": 4.703, + "args": { + "External id": 979154,"Record function id": 0, "Ev Idx": 1745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937260174.583, "dur": 2.844, + "args": { + "External id": 979155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260175.294, "dur": 1.623, + "args": { + "External id": 979156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937260175.889, "dur": 0.945, + "args": { + "External id": 979157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937260182.974, "dur": 280166.801, + "args": { + "External id": 979158,"Record function id": 0, "Sequence number": 10552293, "Fwd thread id": 1, "Ev Idx": 1749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937260184.527, "dur": 280153.864, + "args": { + "External id": 979159,"Sequence number": 10552293, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1750 + } + }, + { + "ph": "f", "id": 173, "pid": 2338708, "tid": 2379421, "ts": 6345937260184.527, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 2338708, "tid": 2379421, + "ts": 6345937260223.005, "dur": 48.150, + "args": { + "External id": 979160,"Record function id": 0, "Ev Idx": 1751 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 2338708, "tid": 2379421, + "ts": 6345937260280.914, "dur": 84.056, + "args": { + "External id": 979161,"Record function id": 0, "Ev Idx": 1752 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 2338708, "tid": 2379421, + "ts": 6345937260371.930, "dur": 279955.247, + "args": { + "External id": 979162,"Record function id": 0, "Ev Idx": 1753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937260435.231, "dur": 8.644, + "args": { + "External id": 979163,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937260454.434, "dur": 5.359, + "args": { + "External id": 979164,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937260480.995, "dur": 278882.547, + "args": { + "External id": 979165,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937260496.734, "dur": 278851.386, + "args": { + "External id": 979166,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937260625.775, "dur": 6.093, + "args": { + "External id": 979167,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937260654.344, "dur": 278637.543, + "args": { + "External id": 979168,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 1759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937260657.402, "dur": 278633.021, + "args": { + "External id": 979169,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 1760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937260662.329, "dur": 12.542, + "args": { + "External id": 979170,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937260677.319, "dur": 278606.430, + "args": { + "External id": 979171,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 1762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937539491.541, "dur": 20.205, + "args": { + "External id": 979172,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 1763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937539499.833, "dur": 11.328, + "args": { + "External id": 979173,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937539557.310, "dur": 335.247, + "args": { + "External id": 979174,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 1765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937539594.776, "dur": 292.492, + "args": { + "External id": 979175,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1766, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937539607.812, "dur": 272.422, + "args": { + "External id": 979176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 1767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937539916.480, "dur": 2.738, + "args": { + "External id": 979177,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1768, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937539987.389, "dur": 7.383, + "args": { + "External id": 979178,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937540118.984, "dur": 2.749, + "args": { + "External id": 979179,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937540145.151, "dur": 1.999, + "args": { + "External id": 979180,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937540163.013, "dur": 1.037, + "args": { + "External id": 979181,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937540180.477, "dur": 0.993, + "args": { + "External id": 979182,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937540198.127, "dur": 1.005, + "args": { + "External id": 979183,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937540213.202, "dur": 1.137, + "args": { + "External id": 979184,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937540230.117, "dur": 3.599, + "args": { + "External id": 979185,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937540246.339, "dur": 1.118, + "args": { + "External id": 979186,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937540369.506, "dur": 3357.289, + "args": { + "External id": 979187,"Record function id": 0, "Ev Idx": 1778 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 2338708, "tid": 2379421, + "ts": 6345937540391.915, "dur": 1245.612, + "args": { + "External id": 979188,"Record function id": 0, "Ev Idx": 1779 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338708, "tid": 2379421, + "ts": 6345937540408.696, "dur": 383.623, + "args": { + "External id": 979189,"Record function id": 0, "Ev Idx": 1780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937540507.391, "dur": 4.573, + "args": { + "External id": 979190,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937540515.825, "dur": 1.493, + "args": { + "External id": 979191,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937540519.278, "dur": 1.334, + "args": { + "External id": 979192,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937540522.685, "dur": 1.660, + "args": { + "External id": 979193,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937540526.379, "dur": 0.932, + "args": { + "External id": 979194,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937540531.268, "dur": 1.321, + "args": { + "External id": 979195,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937540534.419, "dur": 2.983, + "args": { + "External id": 979196,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937540539.221, "dur": 3.214, + "args": { + "External id": 979197,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937540544.201, "dur": 0.966, + "args": { + "External id": 979198,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937540548.965, "dur": 0.805, + "args": { + "External id": 979199,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937540571.105, "dur": 184.313, + "args": { + "External id": 979200,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937540589.265, "dur": 160.896, + "args": { + "External id": 979201,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937540613.152, "dur": 16.336, + "args": { + "External id": 979202,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937540633.721, "dur": 80.952, + "args": { + "External id": 979203,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 1794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937540636.910, "dur": 77.342, + "args": { + "External id": 979204,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 1795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937540643.813, "dur": 8.083, + "args": { + "External id": 979205,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937540653.959, "dur": 59.623, + "args": { + "External id": 979206,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 1797 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 2338708, "tid": 2379421, + "ts": 6345937540884.851, "dur": 744.287, + "args": { + "External id": 979207,"Record function id": 0, "Ev Idx": 1798 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338708, "tid": 2379421, + "ts": 6345937540904.111, "dur": 710.075, + "args": { + "External id": 979208,"Record function id": 0, "Ev Idx": 1799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937540964.765, "dur": 6.370, + "args": { + "External id": 979209,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937540988.753, "dur": 59.331, + "args": { + "External id": 979210,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937540994.589, "dur": 1.822, + "args": { + "External id": 979211,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937540999.108, "dur": 1.650, + "args": { + "External id": 979212,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541002.493, "dur": 2.213, + "args": { + "External id": 979213,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541006.162, "dur": 0.665, + "args": { + "External id": 979214,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541030.191, "dur": 0.710, + "args": { + "External id": 979215,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541033.273, "dur": 0.333, + "args": { + "External id": 979216,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541035.690, "dur": 0.485, + "args": { + "External id": 979217,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541039.892, "dur": 0.509, + "args": { + "External id": 979218,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541042.216, "dur": 0.342, + "args": { + "External id": 979219,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937541096.965, "dur": 53.916, + "args": { + "External id": 979220,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345937541192.264, "dur": 146.727, + "args": { + "External id": 979221,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937541205.210, "dur": 5.578, + "args": { + "External id": 979222,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345937541216.525, "dur": 20.499, + "args": { + "External id": 979223,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937541227.009, "dur": 9.518, + "args": { + "External id": 979224,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541231.672, "dur": 3.005, + "args": { + "External id": 979225,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937541246.177, "dur": 33.334, + "args": { + "External id": 979226,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541248.681, "dur": 0.457, + "args": { + "External id": 979227,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541252.863, "dur": 0.564, + "args": { + "External id": 979228,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541254.974, "dur": 0.348, + "args": { + "External id": 979229,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541256.918, "dur": 2.227, + "args": { + "External id": 979230,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541261.042, "dur": 0.410, + "args": { + "External id": 979231,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541263.184, "dur": 0.432, + "args": { + "External id": 979232,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541267.250, "dur": 0.314, + "args": { + "External id": 979233,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541268.987, "dur": 2.790, + "args": { + "External id": 979234,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937541273.506, "dur": 0.274, + "args": { + "External id": 979235,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937541294.251, "dur": 35.043, + "args": { + "External id": 979236,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937541394.361, "dur": 137.181, + "args": { + "External id": 979237,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937541429.403, "dur": 98.129, + "args": { + "External id": 979238,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1829, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937541440.675, "dur": 81.717, + "args": { + "External id": 979239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937541552.481, "dur": 2.169, + "args": { + "External id": 979240,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1831, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937541646.551, "dur": 2054.070, + "args": { + "External id": 979241,"Sequence number": 10552292, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1832 + } + }, + { + "ph": "f", "id": 174, "pid": 2338708, "tid": 2379421, "ts": 6345937541646.551, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937541771.401, "dur": 119.355, + "args": { + "External id": 979242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937541937.317, "dur": 46.222, + "args": { + "External id": 979243,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345937542005.009, "dur": 125.370, + "args": { + "External id": 979244,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937542148.624, "dur": 41.981, + "args": { + "External id": 979245,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937542201.013, "dur": 39.320, + "args": { + "External id": 979246,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937542249.821, "dur": 32.012, + "args": { + "External id": 979247,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937542292.832, "dur": 33.517, + "args": { + "External id": 979248,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937542358.951, "dur": 32.852, + "args": { + "External id": 979249,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937542416.562, "dur": 32.143, + "args": { + "External id": 979250,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937542476.251, "dur": 24.140, + "args": { + "External id": 979251,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937542520.211, "dur": 21.371, + "args": { + "External id": 979252,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937542553.114, "dur": 43.097, + "args": { + "External id": 979253,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937542600.674, "dur": 37.668, + "args": { + "External id": 979254,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937542671.986, "dur": 281.480, + "args": { + "External id": 979255,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937542764.469, "dur": 6.546, + "args": { + "External id": 979256,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937542773.405, "dur": 2.689, + "args": { + "External id": 979257,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937542777.392, "dur": 4.540, + "args": { + "External id": 979258,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937542783.092, "dur": 3.918, + "args": { + "External id": 979259,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937542835.157, "dur": 6.098, + "args": { + "External id": 979260,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937542837.632, "dur": 3.447, + "args": { + "External id": 979261,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937542843.618, "dur": 36.668, + "args": { + "External id": 979262,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937542850.392, "dur": 3.345, + "args": { + "External id": 979263,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937542882.420, "dur": 2.186, + "args": { + "External id": 979264,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937542883.682, "dur": 0.803, + "args": { + "External id": 979265,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 1856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937542885.922, "dur": 18.747, + "args": { + "External id": 979266,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 1857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937542890.530, "dur": 0.615, + "args": { + "External id": 979267,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 1858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937543035.073, "dur": 80.289, + "args": { + "External id": 979268,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937543146.327, "dur": 18.289, + "args": { + "External id": 979269,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937543175.683, "dur": 59.793, + "args": { + "External id": 979270,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937543243.793, "dur": 44.514, + "args": { + "External id": 979271,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937543300.243, "dur": 24.396, + "args": { + "External id": 979272,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 1863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937543331.082, "dur": 35.056, + "args": { + "External id": 979273,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 1864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937543374.218, "dur": 31.169, + "args": { + "External id": 979274,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 1865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937543412.962, "dur": 33.879, + "args": { + "External id": 979275,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 1866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345937543473.308, "dur": 30.333, + "args": { + "External id": 979276,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 1867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937543522.643, "dur": 28.791, + "args": { + "External id": 979277,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937543569.713, "dur": 22.405, + "args": { + "External id": 979278,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 1869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937543610.633, "dur": 18.577, + "args": { + "External id": 979279,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 1870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345937543645.815, "dur": 20.340, + "args": { + "External id": 979280,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 1871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543753.223, "dur": 17.539, + "args": { + "External id": 979281,"Record function id": 0, "Ev Idx": 1872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543757.387, "dur": 12.417, + "args": { + "External id": 979282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543762.627, "dur": 5.986, + "args": { + "External id": 979283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543764.182, "dur": 4.333, + "args": { + "External id": 979284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543775.595, "dur": 7.770, + "args": { + "External id": 979285,"Record function id": 0, "Ev Idx": 1876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543777.150, "dur": 5.620, + "args": { + "External id": 979286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543778.214, "dur": 3.981, + "args": { + "External id": 979287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543778.683, "dur": 3.434, + "args": { + "External id": 979288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543787.213, "dur": 5.639, + "args": { + "External id": 979289,"Record function id": 0, "Ev Idx": 1880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543789.009, "dur": 3.246, + "args": { + "External id": 979290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543789.737, "dur": 1.945, + "args": { + "External id": 979291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543790.712, "dur": 0.840, + "args": { + "External id": 979292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 1883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543796.682, "dur": 4.666, + "args": { + "External id": 979293,"Record function id": 0, "Ev Idx": 1884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543798.026, "dur": 2.852, + "args": { + "External id": 979294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543798.856, "dur": 1.524, + "args": { + "External id": 979295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543799.449, "dur": 0.824, + "args": { + "External id": 979296,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 1887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543805.059, "dur": 4.536, + "args": { + "External id": 979297,"Record function id": 0, "Ev Idx": 1888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543806.257, "dur": 2.849, + "args": { + "External id": 979298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543807.262, "dur": 1.218, + "args": { + "External id": 979299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543807.684, "dur": 0.707, + "args": { + "External id": 979300,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543813.554, "dur": 4.735, + "args": { + "External id": 979301,"Record function id": 0, "Ev Idx": 1892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543815.008, "dur": 2.814, + "args": { + "External id": 979302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543815.758, "dur": 1.585, + "args": { + "External id": 979303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543816.462, "dur": 0.795, + "args": { + "External id": 979304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 1895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543822.262, "dur": 13.080, + "args": { + "External id": 979305,"Record function id": 0, "Ev Idx": 1896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543823.456, "dur": 11.386, + "args": { + "External id": 979306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543824.213, "dur": 10.144, + "args": { + "External id": 979307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543833.192, "dur": 1.036, + "args": { + "External id": 979308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543839.062, "dur": 4.400, + "args": { + "External id": 979309,"Record function id": 0, "Ev Idx": 1900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543840.322, "dur": 2.670, + "args": { + "External id": 979310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543841.278, "dur": 1.241, + "args": { + "External id": 979311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543841.732, "dur": 0.697, + "args": { + "External id": 979312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 1903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543847.060, "dur": 6.673, + "args": { + "External id": 979313,"Record function id": 0, "Ev Idx": 1904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937543848.517, "dur": 4.714, + "args": { + "External id": 979314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543849.310, "dur": 3.392, + "args": { + "External id": 979315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937543849.767, "dur": 2.855, + "args": { + "External id": 979316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 1907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937543858.256, "dur": 81558.095, + "args": { + "External id": 979317,"Record function id": 0, "Sequence number": 10552291, "Fwd thread id": 1, "Ev Idx": 1908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937543859.638, "dur": 81547.777, + "args": { + "External id": 979318,"Sequence number": 10552291, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1909 + } + }, + { + "ph": "f", "id": 175, "pid": 2338708, "tid": 2379421, "ts": 6345937543859.638, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 2338708, "tid": 2379421, + "ts": 6345937543896.855, "dur": 48.088, + "args": { + "External id": 979319,"Record function id": 0, "Ev Idx": 1910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 2338708, "tid": 2379421, + "ts": 6345937543953.967, "dur": 135.389, + "args": { + "External id": 979320,"Record function id": 0, "Ev Idx": 1911 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 2338708, "tid": 2379421, + "ts": 6345937544102.438, "dur": 81295.100, + "args": { + "External id": 979321,"Record function id": 0, "Ev Idx": 1912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937544214.429, "dur": 9.381, + "args": { + "External id": 979322,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937544236.452, "dur": 5.742, + "args": { + "External id": 979323,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937544260.235, "dur": 80142.201, + "args": { + "External id": 979324,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937544276.163, "dur": 80112.059, + "args": { + "External id": 979325,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 1916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937544396.134, "dur": 19.371, + "args": { + "External id": 979326,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937544441.582, "dur": 79896.886, + "args": { + "External id": 979327,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 1918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937544444.760, "dur": 79892.526, + "args": { + "External id": 979328,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 1919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937544450.081, "dur": 11.595, + "args": { + "External id": 979329,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937544463.841, "dur": 79867.834, + "args": { + "External id": 979330,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 1921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937624523.250, "dur": 12.924, + "args": { + "External id": 979331,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 1922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937624527.130, "dur": 8.604, + "args": { + "External id": 979332,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937624570.318, "dur": 409.596, + "args": { + "External id": 979333,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 1924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937624607.928, "dur": 366.555, + "args": { + "External id": 979334,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1925, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937624622.435, "dur": 343.325, + "args": { + "External id": 979335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 1926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937625005.758, "dur": 21.989, + "args": { + "External id": 979336,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1927, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937625137.664, "dur": 8.167, + "args": { + "External id": 979337,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937625200.312, "dur": 4.279, + "args": { + "External id": 979338,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937625225.489, "dur": 1.578, + "args": { + "External id": 979339,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937625242.099, "dur": 1.255, + "args": { + "External id": 979340,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937625261.093, "dur": 0.944, + "args": { + "External id": 979341,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937625276.226, "dur": 2.878, + "args": { + "External id": 979342,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937625292.859, "dur": 1.359, + "args": { + "External id": 979343,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937625308.246, "dur": 2.838, + "args": { + "External id": 979344,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937625324.075, "dur": 1.015, + "args": { + "External id": 979345,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937625435.145, "dur": 3331.710, + "args": { + "External id": 979346,"Record function id": 0, "Ev Idx": 1937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 2338708, "tid": 2379421, + "ts": 6345937625457.402, "dur": 1247.136, + "args": { + "External id": 979347,"Record function id": 0, "Ev Idx": 1938 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338708, "tid": 2379421, + "ts": 6345937625474.175, "dur": 393.977, + "args": { + "External id": 979348,"Record function id": 0, "Ev Idx": 1939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937625571.221, "dur": 6.871, + "args": { + "External id": 979349,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 1940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937625581.899, "dur": 1.146, + "args": { + "External id": 979350,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937625585.273, "dur": 1.402, + "args": { + "External id": 979351,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937625588.529, "dur": 0.847, + "args": { + "External id": 979352,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937625593.401, "dur": 1.086, + "args": { + "External id": 979353,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937625596.483, "dur": 0.548, + "args": { + "External id": 979354,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 1945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937625598.698, "dur": 2.860, + "args": { + "External id": 979355,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 1946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937625603.035, "dur": 0.798, + "args": { + "External id": 979356,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937625607.900, "dur": 3.236, + "args": { + "External id": 979357,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937625612.885, "dur": 0.954, + "args": { + "External id": 979358,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 1949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937625649.367, "dur": 182.644, + "args": { + "External id": 979359,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937625667.962, "dur": 158.298, + "args": { + "External id": 979360,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 1951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937625688.587, "dur": 17.417, + "args": { + "External id": 979361,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937625710.380, "dur": 82.228, + "args": { + "External id": 979362,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 1953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937625716.379, "dur": 75.660, + "args": { + "External id": 979363,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 1954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937625721.435, "dur": 7.197, + "args": { + "External id": 979364,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937625730.679, "dur": 60.646, + "args": { + "External id": 979365,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 1956 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 2338708, "tid": 2379421, + "ts": 6345937625964.046, "dur": 731.358, + "args": { + "External id": 979366,"Record function id": 0, "Ev Idx": 1957 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338708, "tid": 2379421, + "ts": 6345937625983.493, "dur": 696.997, + "args": { + "External id": 979367,"Record function id": 0, "Ev Idx": 1958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937626108.427, "dur": 8.126, + "args": { + "External id": 979368,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937626136.281, "dur": 43.964, + "args": { + "External id": 979369,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626142.363, "dur": 2.017, + "args": { + "External id": 979370,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626147.361, "dur": 1.877, + "args": { + "External id": 979371,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626151.279, "dur": 0.536, + "args": { + "External id": 979372,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626153.723, "dur": 2.552, + "args": { + "External id": 979373,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626164.140, "dur": 0.347, + "args": { + "External id": 979374,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626166.140, "dur": 0.731, + "args": { + "External id": 979375,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626168.291, "dur": 0.532, + "args": { + "External id": 979376,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626171.811, "dur": 0.445, + "args": { + "External id": 979377,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626173.607, "dur": 0.555, + "args": { + "External id": 979378,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937626192.391, "dur": 53.089, + "args": { + "External id": 979379,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345937626283.444, "dur": 127.476, + "args": { + "External id": 979380,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 1971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937626295.369, "dur": 4.603, + "args": { + "External id": 979381,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345937626305.814, "dur": 11.916, + "args": { + "External id": 979382,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937626310.681, "dur": 6.574, + "args": { + "External id": 979383,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 1974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626315.010, "dur": 0.771, + "args": { + "External id": 979384,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 1975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937626325.529, "dur": 33.151, + "args": { + "External id": 979385,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 1976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626328.441, "dur": 2.742, + "args": { + "External id": 979386,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626334.426, "dur": 0.282, + "args": { + "External id": 979387,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626336.134, "dur": 0.541, + "args": { + "External id": 979388,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626338.855, "dur": 1.346, + "args": { + "External id": 979389,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626341.691, "dur": 0.376, + "args": { + "External id": 979390,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626343.420, "dur": 0.387, + "args": { + "External id": 979391,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626346.944, "dur": 0.264, + "args": { + "External id": 979392,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626348.910, "dur": 0.289, + "args": { + "External id": 979393,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937626350.494, "dur": 2.428, + "args": { + "External id": 979394,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 1985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937626370.029, "dur": 31.145, + "args": { + "External id": 979395,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 1986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937626464.699, "dur": 135.516, + "args": { + "External id": 979396,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 1987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937626499.988, "dur": 96.187, + "args": { + "External id": 979397,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1988, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937626511.105, "dur": 79.826, + "args": { + "External id": 979398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 1989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937626621.183, "dur": 2.313, + "args": { + "External id": 979399,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1990, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937626712.824, "dur": 2029.071, + "args": { + "External id": 979400,"Sequence number": 10552290, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 1991 + } + }, + { + "ph": "f", "id": 176, "pid": 2338708, "tid": 2379421, "ts": 6345937626712.824, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937626835.964, "dur": 117.850, + "args": { + "External id": 979401,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 1992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937627000.674, "dur": 107.825, + "args": { + "External id": 979402,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 1993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345937627135.187, "dur": 66.438, + "args": { + "External id": 979403,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 1994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937627213.993, "dur": 37.606, + "args": { + "External id": 979404,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937627258.410, "dur": 37.516, + "args": { + "External id": 979405,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937627304.760, "dur": 32.425, + "args": { + "External id": 979406,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 1997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937627347.315, "dur": 33.319, + "args": { + "External id": 979407,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 1998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937627413.304, "dur": 27.942, + "args": { + "External id": 979408,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 1999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937627464.971, "dur": 32.465, + "args": { + "External id": 979409,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937627526.560, "dur": 22.677, + "args": { + "External id": 979410,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937627563.645, "dur": 20.535, + "args": { + "External id": 979411,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937627594.725, "dur": 43.181, + "args": { + "External id": 979412,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937627641.902, "dur": 36.446, + "args": { + "External id": 979413,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937627712.099, "dur": 324.802, + "args": { + "External id": 979414,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937627806.698, "dur": 7.427, + "args": { + "External id": 979415,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937627816.477, "dur": 3.023, + "args": { + "External id": 979416,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937627820.964, "dur": 1.563, + "args": { + "External id": 979417,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937627823.779, "dur": 3.613, + "args": { + "External id": 979418,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937627878.672, "dur": 5.598, + "args": { + "External id": 979419,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937627881.086, "dur": 2.982, + "args": { + "External id": 979420,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937627886.670, "dur": 37.725, + "args": { + "External id": 979421,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937627893.357, "dur": 3.095, + "args": { + "External id": 979422,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937627926.126, "dur": 1.670, + "args": { + "External id": 979423,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937627927.126, "dur": 0.571, + "args": { + "External id": 979424,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937627929.295, "dur": 25.503, + "args": { + "External id": 979425,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937627931.386, "dur": 0.679, + "args": { + "External id": 979426,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937628130.752, "dur": 42.240, + "args": { + "External id": 979427,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937628200.033, "dur": 19.978, + "args": { + "External id": 979428,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937628230.247, "dur": 57.012, + "args": { + "External id": 979429,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937628295.020, "dur": 44.279, + "args": { + "External id": 979430,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937628350.666, "dur": 24.634, + "args": { + "External id": 979431,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937628381.894, "dur": 36.227, + "args": { + "External id": 979432,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937628426.439, "dur": 33.127, + "args": { + "External id": 979433,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937628466.941, "dur": 34.378, + "args": { + "External id": 979434,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345937628525.224, "dur": 28.661, + "args": { + "External id": 979435,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937628572.245, "dur": 26.360, + "args": { + "External id": 979436,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937628617.487, "dur": 19.655, + "args": { + "External id": 979437,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937628657.883, "dur": 15.389, + "args": { + "External id": 979438,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345937628691.102, "dur": 18.173, + "args": { + "External id": 979439,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628792.770, "dur": 17.963, + "args": { + "External id": 979440,"Record function id": 0, "Ev Idx": 2031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628797.186, "dur": 12.536, + "args": { + "External id": 979441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628802.415, "dur": 6.138, + "args": { + "External id": 979442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628804.145, "dur": 4.303, + "args": { + "External id": 979443,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628815.515, "dur": 6.071, + "args": { + "External id": 979444,"Record function id": 0, "Ev Idx": 2035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628817.525, "dur": 3.552, + "args": { + "External id": 979445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628818.620, "dur": 1.946, + "args": { + "External id": 979446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628819.395, "dur": 1.066, + "args": { + "External id": 979447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628825.661, "dur": 7.935, + "args": { + "External id": 979448,"Record function id": 0, "Ev Idx": 2039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628827.286, "dur": 5.811, + "args": { + "External id": 979449,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628828.310, "dur": 4.297, + "args": { + "External id": 979450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628829.105, "dur": 3.367, + "args": { + "External id": 979451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628837.445, "dur": 5.646, + "args": { + "External id": 979452,"Record function id": 0, "Ev Idx": 2043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628839.411, "dur": 3.168, + "args": { + "External id": 979453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628840.254, "dur": 1.855, + "args": { + "External id": 979454,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628841.110, "dur": 0.914, + "args": { + "External id": 979455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628846.862, "dur": 43.682, + "args": { + "External id": 979456,"Record function id": 0, "Ev Idx": 2047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628887.342, "dur": 2.709, + "args": { + "External id": 979457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628888.084, "dur": 1.339, + "args": { + "External id": 979458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628888.486, "dur": 0.851, + "args": { + "External id": 979459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628894.569, "dur": 11.983, + "args": { + "External id": 979460,"Record function id": 0, "Ev Idx": 2051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628900.989, "dur": 5.071, + "args": { + "External id": 979461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628901.558, "dur": 4.013, + "args": { + "External id": 979462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628904.466, "dur": 0.992, + "args": { + "External id": 979463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628911.377, "dur": 4.332, + "args": { + "External id": 979464,"Record function id": 0, "Ev Idx": 2055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628912.911, "dur": 2.301, + "args": { + "External id": 979465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628913.491, "dur": 1.128, + "args": { + "External id": 979466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628913.829, "dur": 0.675, + "args": { + "External id": 979467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628919.472, "dur": 4.678, + "args": { + "External id": 979468,"Record function id": 0, "Ev Idx": 2059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628921.044, "dur": 2.623, + "args": { + "External id": 979469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628921.756, "dur": 1.406, + "args": { + "External id": 979470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628922.273, "dur": 0.813, + "args": { + "External id": 979471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628927.822, "dur": 3.944, + "args": { + "External id": 979472,"Record function id": 0, "Ev Idx": 2063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937628929.063, "dur": 2.183, + "args": { + "External id": 979473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628929.655, "dur": 1.131, + "args": { + "External id": 979474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937628930.087, "dur": 0.620, + "args": { + "External id": 979475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937628936.075, "dur": 74207.885, + "args": { + "External id": 979476,"Record function id": 0, "Sequence number": 10552289, "Fwd thread id": 1, "Ev Idx": 2067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937628940.214, "dur": 74191.944, + "args": { + "External id": 979477,"Sequence number": 10552289, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2068 + } + }, + { + "ph": "f", "id": 177, "pid": 2338708, "tid": 2379421, "ts": 6345937628940.214, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 2338708, "tid": 2379421, + "ts": 6345937628976.516, "dur": 66.338, + "args": { + "External id": 979478,"Record function id": 0, "Ev Idx": 2069 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 2338708, "tid": 2379421, + "ts": 6345937629097.909, "dur": 83.473, + "args": { + "External id": 979479,"Record function id": 0, "Ev Idx": 2070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 2338708, "tid": 2379421, + "ts": 6345937629189.496, "dur": 73932.285, + "args": { + "External id": 979480,"Record function id": 0, "Ev Idx": 2071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937629297.244, "dur": 9.607, + "args": { + "External id": 979481,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937629319.395, "dur": 8.008, + "args": { + "External id": 979482,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937629348.158, "dur": 72796.024, + "args": { + "External id": 979483,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937629365.065, "dur": 72764.523, + "args": { + "External id": 979484,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937629471.272, "dur": 18.479, + "args": { + "External id": 979485,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937629512.952, "dur": 72565.258, + "args": { + "External id": 979486,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937629516.114, "dur": 72560.470, + "args": { + "External id": 979487,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937629521.184, "dur": 12.387, + "args": { + "External id": 979488,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937629536.024, "dur": 72510.589, + "args": { + "External id": 979489,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937702263.981, "dur": 13.554, + "args": { + "External id": 979490,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937702267.935, "dur": 9.130, + "args": { + "External id": 979491,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937702309.912, "dur": 403.323, + "args": { + "External id": 979492,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937702347.326, "dur": 360.429, + "args": { + "External id": 979493,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2084, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937702361.333, "dur": 339.635, + "args": { + "External id": 979494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937702740.144, "dur": 2.797, + "args": { + "External id": 979495,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2086, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937702808.996, "dur": 7.694, + "args": { + "External id": 979496,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937702870.920, "dur": 1.912, + "args": { + "External id": 979497,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937702893.115, "dur": 4.195, + "args": { + "External id": 979498,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937702911.906, "dur": 0.981, + "args": { + "External id": 979499,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937702928.189, "dur": 0.942, + "args": { + "External id": 979500,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937702942.811, "dur": 1.068, + "args": { + "External id": 979501,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937702957.921, "dur": 3.247, + "args": { + "External id": 979502,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937702974.575, "dur": 3.322, + "args": { + "External id": 979503,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937702992.475, "dur": 1.091, + "args": { + "External id": 979504,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937703162.999, "dur": 3263.457, + "args": { + "External id": 979505,"Record function id": 0, "Ev Idx": 2096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 2338708, "tid": 2379421, + "ts": 6345937703184.436, "dur": 1200.170, + "args": { + "External id": 979506,"Record function id": 0, "Ev Idx": 2097 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338708, "tid": 2379421, + "ts": 6345937703200.016, "dur": 370.533, + "args": { + "External id": 979507,"Record function id": 0, "Ev Idx": 2098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937703296.816, "dur": 5.813, + "args": { + "External id": 979508,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937703307.467, "dur": 1.204, + "args": { + "External id": 979509,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937703310.736, "dur": 3.520, + "args": { + "External id": 979510,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937703316.547, "dur": 1.059, + "args": { + "External id": 979511,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937703319.230, "dur": 0.966, + "args": { + "External id": 979512,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937703321.767, "dur": 1.006, + "args": { + "External id": 979513,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937703324.576, "dur": 2.307, + "args": { + "External id": 979514,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937703330.260, "dur": 0.846, + "args": { + "External id": 979515,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937703332.859, "dur": 0.914, + "args": { + "External id": 979516,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937703335.522, "dur": 0.666, + "args": { + "External id": 979517,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937703356.943, "dur": 178.323, + "args": { + "External id": 979518,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937703375.396, "dur": 154.493, + "args": { + "External id": 979519,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937703402.857, "dur": 16.745, + "args": { + "External id": 979520,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937703423.864, "dur": 75.509, + "args": { + "External id": 979521,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937703428.657, "dur": 70.299, + "args": { + "External id": 979522,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703433.166, "dur": 7.053, + "args": { + "External id": 979523,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937703442.290, "dur": 56.053, + "args": { + "External id": 979524,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2115 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 2338708, "tid": 2379421, + "ts": 6345937703660.621, "dur": 715.224, + "args": { + "External id": 979525,"Record function id": 0, "Ev Idx": 2116 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338708, "tid": 2379421, + "ts": 6345937703679.418, "dur": 681.749, + "args": { + "External id": 979526,"Record function id": 0, "Ev Idx": 2117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937703738.277, "dur": 5.285, + "args": { + "External id": 979527,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937703761.117, "dur": 35.960, + "args": { + "External id": 979528,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703766.704, "dur": 1.805, + "args": { + "External id": 979529,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703773.103, "dur": 0.428, + "args": { + "External id": 979530,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703775.352, "dur": 0.370, + "args": { + "External id": 979531,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703777.503, "dur": 0.403, + "args": { + "External id": 979532,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703780.611, "dur": 0.347, + "args": { + "External id": 979533,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703782.501, "dur": 2.616, + "args": { + "External id": 979534,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703786.742, "dur": 1.652, + "args": { + "External id": 979535,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703789.807, "dur": 0.568, + "args": { + "External id": 979536,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703791.939, "dur": 0.426, + "args": { + "External id": 979537,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937703808.357, "dur": 45.414, + "args": { + "External id": 979538,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345937703888.556, "dur": 151.806, + "args": { + "External id": 979539,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937703899.590, "dur": 3.816, + "args": { + "External id": 979540,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345937703909.352, "dur": 11.979, + "args": { + "External id": 979541,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937703914.224, "dur": 6.660, + "args": { + "External id": 979542,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703918.789, "dur": 0.625, + "args": { + "External id": 979543,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937703929.123, "dur": 31.954, + "args": { + "External id": 979544,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703931.517, "dur": 2.379, + "args": { + "External id": 979545,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703935.723, "dur": 0.433, + "args": { + "External id": 979546,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703938.216, "dur": 2.492, + "args": { + "External id": 979547,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703943.731, "dur": 0.441, + "args": { + "External id": 979548,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703945.671, "dur": 0.403, + "args": { + "External id": 979549,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703947.572, "dur": 0.515, + "args": { + "External id": 979550,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703950.578, "dur": 0.441, + "args": { + "External id": 979551,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703952.725, "dur": 0.413, + "args": { + "External id": 979552,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937703954.480, "dur": 1.774, + "args": { + "External id": 979553,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937703974.066, "dur": 54.905, + "args": { + "External id": 979554,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937704137.039, "dur": 141.838, + "args": { + "External id": 979555,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937704170.839, "dur": 104.095, + "args": { + "External id": 979556,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2147, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937704183.506, "dur": 85.774, + "args": { + "External id": 979557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937704298.791, "dur": 2.108, + "args": { + "External id": 979558,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2149, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937704393.176, "dur": 2009.114, + "args": { + "External id": 979559,"Sequence number": 10552288, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2150 + } + }, + { + "ph": "f", "id": 178, "pid": 2338708, "tid": 2379421, "ts": 6345937704393.176, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937704521.477, "dur": 117.937, + "args": { + "External id": 979560,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937704684.250, "dur": 41.503, + "args": { + "External id": 979561,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345937704743.464, "dur": 54.952, + "args": { + "External id": 979562,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937704824.104, "dur": 37.944, + "args": { + "External id": 979563,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937704870.111, "dur": 37.217, + "args": { + "External id": 979564,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937704914.703, "dur": 31.333, + "args": { + "External id": 979565,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937704954.959, "dur": 33.792, + "args": { + "External id": 979566,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937705044.069, "dur": 68.114, + "args": { + "External id": 979567,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937705138.794, "dur": 30.868, + "args": { + "External id": 979568,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937705197.077, "dur": 25.527, + "args": { + "External id": 979569,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937705238.625, "dur": 18.490, + "args": { + "External id": 979570,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937705267.664, "dur": 49.629, + "args": { + "External id": 979571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937705321.655, "dur": 38.513, + "args": { + "External id": 979572,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937705394.347, "dur": 279.735, + "args": { + "External id": 979573,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937705484.948, "dur": 6.901, + "args": { + "External id": 979574,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937705494.207, "dur": 3.896, + "args": { + "External id": 979575,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937705499.586, "dur": 3.632, + "args": { + "External id": 979576,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937705504.351, "dur": 1.825, + "args": { + "External id": 979577,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937705556.571, "dur": 8.350, + "args": { + "External id": 979578,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937705561.497, "dur": 3.160, + "args": { + "External id": 979579,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937705567.143, "dur": 36.140, + "args": { + "External id": 979580,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937705573.652, "dur": 3.754, + "args": { + "External id": 979581,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937705605.090, "dur": 1.714, + "args": { + "External id": 979582,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937705606.199, "dur": 0.504, + "args": { + "External id": 979583,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937705608.184, "dur": 16.453, + "args": { + "External id": 979584,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937705610.743, "dur": 0.696, + "args": { + "External id": 979585,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937705731.204, "dur": 37.841, + "args": { + "External id": 979586,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937705795.293, "dur": 18.038, + "args": { + "External id": 979587,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937705823.450, "dur": 44.339, + "args": { + "External id": 979588,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937705874.990, "dur": 42.000, + "args": { + "External id": 979589,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937705927.951, "dur": 27.135, + "args": { + "External id": 979590,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937705961.712, "dur": 36.274, + "args": { + "External id": 979591,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937706005.545, "dur": 90.850, + "args": { + "External id": 979592,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937706111.124, "dur": 39.013, + "args": { + "External id": 979593,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345937706175.858, "dur": 27.722, + "args": { + "External id": 979594,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937706225.217, "dur": 28.002, + "args": { + "External id": 979595,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937706272.083, "dur": 20.519, + "args": { + "External id": 979596,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937706314.223, "dur": 17.638, + "args": { + "External id": 979597,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345937706348.551, "dur": 19.065, + "args": { + "External id": 979598,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706452.090, "dur": 17.242, + "args": { + "External id": 979599,"Record function id": 0, "Ev Idx": 2190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706456.005, "dur": 12.216, + "args": { + "External id": 979600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706460.912, "dur": 6.155, + "args": { + "External id": 979601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706462.670, "dur": 4.296, + "args": { + "External id": 979602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706474.201, "dur": 5.537, + "args": { + "External id": 979603,"Record function id": 0, "Ev Idx": 2194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706475.891, "dur": 3.362, + "args": { + "External id": 979604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706476.860, "dur": 1.823, + "args": { + "External id": 979605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706477.512, "dur": 1.075, + "args": { + "External id": 979606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706483.626, "dur": 8.405, + "args": { + "External id": 979607,"Record function id": 0, "Ev Idx": 2198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706485.592, "dur": 5.927, + "args": { + "External id": 979608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706486.434, "dur": 4.515, + "args": { + "External id": 979609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706487.211, "dur": 3.596, + "args": { + "External id": 979610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706495.839, "dur": 5.121, + "args": { + "External id": 979611,"Record function id": 0, "Ev Idx": 2202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706497.445, "dur": 3.032, + "args": { + "External id": 979612,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706498.168, "dur": 1.816, + "args": { + "External id": 979613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706498.971, "dur": 0.936, + "args": { + "External id": 979614,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706504.670, "dur": 4.390, + "args": { + "External id": 979615,"Record function id": 0, "Ev Idx": 2206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706506.091, "dur": 2.454, + "args": { + "External id": 979616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706506.764, "dur": 1.258, + "args": { + "External id": 979617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706507.100, "dur": 0.840, + "args": { + "External id": 979618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706512.834, "dur": 4.887, + "args": { + "External id": 979619,"Record function id": 0, "Ev Idx": 2210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706514.414, "dur": 2.818, + "args": { + "External id": 979620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706515.133, "dur": 1.560, + "args": { + "External id": 979621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706515.869, "dur": 0.736, + "args": { + "External id": 979622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706521.507, "dur": 4.859, + "args": { + "External id": 979623,"Record function id": 0, "Ev Idx": 2214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706522.939, "dur": 2.947, + "args": { + "External id": 979624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706523.867, "dur": 1.540, + "args": { + "External id": 979625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706524.437, "dur": 0.883, + "args": { + "External id": 979626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706530.078, "dur": 7.295, + "args": { + "External id": 979627,"Record function id": 0, "Ev Idx": 2218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706531.492, "dur": 5.420, + "args": { + "External id": 979628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706532.366, "dur": 4.067, + "args": { + "External id": 979629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706535.497, "dur": 0.849, + "args": { + "External id": 979630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706543.012, "dur": 5.039, + "args": { + "External id": 979631,"Record function id": 0, "Ev Idx": 2222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937706544.543, "dur": 3.016, + "args": { + "External id": 979632,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706545.378, "dur": 1.703, + "args": { + "External id": 979633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937706546.210, "dur": 0.789, + "args": { + "External id": 979634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937706552.896, "dur": 72871.485, + "args": { + "External id": 979635,"Record function id": 0, "Sequence number": 10552287, "Fwd thread id": 1, "Ev Idx": 2226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937706554.644, "dur": 72859.915, + "args": { + "External id": 979636,"Sequence number": 10552287, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2227 + } + }, + { + "ph": "f", "id": 179, "pid": 2338708, "tid": 2379421, "ts": 6345937706554.644, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 2338708, "tid": 2379421, + "ts": 6345937706589.368, "dur": 46.919, + "args": { + "External id": 979637,"Record function id": 0, "Ev Idx": 2228 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 2338708, "tid": 2379421, + "ts": 6345937706645.812, "dur": 72.588, + "args": { + "External id": 979638,"Record function id": 0, "Ev Idx": 2229 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 2338708, "tid": 2379421, + "ts": 6345937706725.257, "dur": 72679.684, + "args": { + "External id": 979639,"Record function id": 0, "Ev Idx": 2230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937706828.113, "dur": 7.744, + "args": { + "External id": 979640,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937706846.528, "dur": 8.262, + "args": { + "External id": 979641,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937706875.610, "dur": 71496.446, + "args": { + "External id": 979642,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937706893.649, "dur": 71463.937, + "args": { + "External id": 979643,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937706999.175, "dur": 41.724, + "args": { + "External id": 979644,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937707107.135, "dur": 71198.811, + "args": { + "External id": 979645,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937707110.863, "dur": 71193.998, + "args": { + "External id": 979646,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937707116.692, "dur": 15.403, + "args": { + "External id": 979647,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937707135.097, "dur": 71164.591, + "args": { + "External id": 979648,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937778490.503, "dur": 13.335, + "args": { + "External id": 979649,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937778494.287, "dur": 8.939, + "args": { + "External id": 979650,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937778538.763, "dur": 445.578, + "args": { + "External id": 979651,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937778577.304, "dur": 400.907, + "args": { + "External id": 979652,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2243, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937778591.686, "dur": 379.753, + "args": { + "External id": 979653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937779030.886, "dur": 3.983, + "args": { + "External id": 979654,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2245, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937779147.760, "dur": 8.389, + "args": { + "External id": 979655,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937779211.551, "dur": 1.878, + "args": { + "External id": 979656,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937779232.927, "dur": 3.897, + "args": { + "External id": 979657,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937779251.003, "dur": 1.112, + "args": { + "External id": 979658,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937779266.249, "dur": 0.903, + "args": { + "External id": 979659,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937779281.151, "dur": 1.125, + "args": { + "External id": 979660,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937779295.533, "dur": 3.359, + "args": { + "External id": 979661,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937779312.354, "dur": 2.675, + "args": { + "External id": 979662,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937779330.651, "dur": 1.118, + "args": { + "External id": 979663,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937779443.715, "dur": 3314.384, + "args": { + "External id": 979664,"Record function id": 0, "Ev Idx": 2255 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 2338708, "tid": 2379421, + "ts": 6345937779466.341, "dur": 1222.429, + "args": { + "External id": 979665,"Record function id": 0, "Ev Idx": 2256 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338708, "tid": 2379421, + "ts": 6345937779483.142, "dur": 377.358, + "args": { + "External id": 979666,"Record function id": 0, "Ev Idx": 2257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937779581.136, "dur": 5.120, + "args": { + "External id": 979667,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937779590.212, "dur": 0.948, + "args": { + "External id": 979668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937779593.332, "dur": 3.834, + "args": { + "External id": 979669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937779599.297, "dur": 0.929, + "args": { + "External id": 979670,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937779601.807, "dur": 0.896, + "args": { + "External id": 979671,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937779604.604, "dur": 1.113, + "args": { + "External id": 979672,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937779607.479, "dur": 2.008, + "args": { + "External id": 979673,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937779612.878, "dur": 0.842, + "args": { + "External id": 979674,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937779615.423, "dur": 0.907, + "args": { + "External id": 979675,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937779617.999, "dur": 0.769, + "args": { + "External id": 979676,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937779640.137, "dur": 185.075, + "args": { + "External id": 979677,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937779658.266, "dur": 160.985, + "args": { + "External id": 979678,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937779684.461, "dur": 18.759, + "args": { + "External id": 979679,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937779707.492, "dur": 78.089, + "args": { + "External id": 979680,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937779710.423, "dur": 74.595, + "args": { + "External id": 979681,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937779716.683, "dur": 7.400, + "args": { + "External id": 979682,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937779726.051, "dur": 58.114, + "args": { + "External id": 979683,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2274 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 2338708, "tid": 2379421, + "ts": 6345937779951.530, "dur": 727.316, + "args": { + "External id": 979684,"Record function id": 0, "Ev Idx": 2275 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338708, "tid": 2379421, + "ts": 6345937779971.646, "dur": 692.274, + "args": { + "External id": 979685,"Record function id": 0, "Ev Idx": 2276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937780090.409, "dur": 7.868, + "args": { + "External id": 979686,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937780118.851, "dur": 41.360, + "args": { + "External id": 979687,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780124.546, "dur": 2.296, + "args": { + "External id": 979688,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780129.075, "dur": 2.602, + "args": { + "External id": 979689,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780133.499, "dur": 0.464, + "args": { + "External id": 979690,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780135.905, "dur": 0.560, + "args": { + "External id": 979691,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780140.305, "dur": 0.433, + "args": { + "External id": 979692,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780142.744, "dur": 2.873, + "args": { + "External id": 979693,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780147.665, "dur": 0.551, + "args": { + "External id": 979694,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780151.333, "dur": 0.506, + "args": { + "External id": 979695,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780153.560, "dur": 0.603, + "args": { + "External id": 979696,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937780172.712, "dur": 53.453, + "args": { + "External id": 979697,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345937780264.558, "dur": 128.683, + "args": { + "External id": 979698,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937780277.452, "dur": 4.205, + "args": { + "External id": 979699,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345937780287.566, "dur": 11.809, + "args": { + "External id": 979700,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937780292.515, "dur": 6.382, + "args": { + "External id": 979701,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780296.861, "dur": 0.579, + "args": { + "External id": 979702,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937780306.998, "dur": 32.361, + "args": { + "External id": 979703,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780309.176, "dur": 0.467, + "args": { + "External id": 979704,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780313.268, "dur": 0.469, + "args": { + "External id": 979705,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780316.125, "dur": 3.107, + "args": { + "External id": 979706,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780320.843, "dur": 1.774, + "args": { + "External id": 979707,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780324.828, "dur": 0.496, + "args": { + "External id": 979708,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780326.672, "dur": 0.432, + "args": { + "External id": 979709,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780330.768, "dur": 0.390, + "args": { + "External id": 979710,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780332.605, "dur": 0.412, + "args": { + "External id": 979711,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937780334.552, "dur": 0.421, + "args": { + "External id": 979712,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937780351.264, "dur": 32.847, + "args": { + "External id": 979713,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937780446.201, "dur": 138.142, + "args": { + "External id": 979714,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937780479.786, "dur": 100.731, + "args": { + "External id": 979715,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2306, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937780491.634, "dur": 83.525, + "args": { + "External id": 979716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937780604.526, "dur": 2.219, + "args": { + "External id": 979717,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2308, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937780697.567, "dur": 2037.886, + "args": { + "External id": 979718,"Sequence number": 10552286, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2309 + } + }, + { + "ph": "f", "id": 180, "pid": 2338708, "tid": 2379421, "ts": 6345937780697.567, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937780822.292, "dur": 117.840, + "args": { + "External id": 979719,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937780988.140, "dur": 109.142, + "args": { + "External id": 979720,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345937781122.962, "dur": 65.476, + "args": { + "External id": 979721,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937781203.299, "dur": 36.880, + "args": { + "External id": 979722,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937781247.077, "dur": 36.686, + "args": { + "External id": 979723,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937781291.190, "dur": 31.876, + "args": { + "External id": 979724,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937781331.756, "dur": 33.200, + "args": { + "External id": 979725,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937781399.183, "dur": 27.348, + "args": { + "External id": 979726,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937781446.927, "dur": 30.805, + "args": { + "External id": 979727,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937781505.556, "dur": 21.000, + "args": { + "External id": 979728,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937781541.598, "dur": 17.879, + "args": { + "External id": 979729,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937781567.117, "dur": 41.465, + "args": { + "External id": 979730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937781613.052, "dur": 36.201, + "args": { + "External id": 979731,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937781685.774, "dur": 315.018, + "args": { + "External id": 979732,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937781779.510, "dur": 6.398, + "args": { + "External id": 979733,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937781788.277, "dur": 2.637, + "args": { + "External id": 979734,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937781792.507, "dur": 2.810, + "args": { + "External id": 979735,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937781796.591, "dur": 1.745, + "args": { + "External id": 979736,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937781845.569, "dur": 7.984, + "args": { + "External id": 979737,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937781850.310, "dur": 3.074, + "args": { + "External id": 979738,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937781856.110, "dur": 40.305, + "args": { + "External id": 979739,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937781862.917, "dur": 5.973, + "args": { + "External id": 979740,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937781898.392, "dur": 12.335, + "args": { + "External id": 979741,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937781899.779, "dur": 9.307, + "args": { + "External id": 979742,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937781914.629, "dur": 28.160, + "args": { + "External id": 979743,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937781919.438, "dur": 0.832, + "args": { + "External id": 979744,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937782110.998, "dur": 36.565, + "args": { + "External id": 979745,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937782173.196, "dur": 19.786, + "args": { + "External id": 979746,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937782203.599, "dur": 59.921, + "args": { + "External id": 979747,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937782270.543, "dur": 46.630, + "args": { + "External id": 979748,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937782329.331, "dur": 26.859, + "args": { + "External id": 979749,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937782363.505, "dur": 37.540, + "args": { + "External id": 979750,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937782409.479, "dur": 31.776, + "args": { + "External id": 979751,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937782448.310, "dur": 33.822, + "args": { + "External id": 979752,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345937782506.014, "dur": 28.685, + "args": { + "External id": 979753,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937782563.499, "dur": 29.220, + "args": { + "External id": 979754,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937782611.652, "dur": 18.728, + "args": { + "External id": 979755,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937782649.574, "dur": 15.949, + "args": { + "External id": 979756,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345937782683.886, "dur": 19.420, + "args": { + "External id": 979757,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782782.905, "dur": 18.918, + "args": { + "External id": 979758,"Record function id": 0, "Ev Idx": 2349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782786.843, "dur": 13.840, + "args": { + "External id": 979759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782791.779, "dur": 7.719, + "args": { + "External id": 979760,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782793.539, "dur": 5.838, + "args": { + "External id": 979761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782806.657, "dur": 5.617, + "args": { + "External id": 979762,"Record function id": 0, "Ev Idx": 2353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782808.661, "dur": 3.025, + "args": { + "External id": 979763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782809.473, "dur": 1.673, + "args": { + "External id": 979764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782810.051, "dur": 0.963, + "args": { + "External id": 979765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782816.156, "dur": 8.342, + "args": { + "External id": 979766,"Record function id": 0, "Ev Idx": 2357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782817.738, "dur": 6.205, + "args": { + "External id": 979767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782818.938, "dur": 4.524, + "args": { + "External id": 979768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782819.888, "dur": 3.430, + "args": { + "External id": 979769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782828.781, "dur": 4.912, + "args": { + "External id": 979770,"Record function id": 0, "Ev Idx": 2361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782830.331, "dur": 2.850, + "args": { + "External id": 979771,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782831.114, "dur": 1.615, + "args": { + "External id": 979772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782831.578, "dur": 1.053, + "args": { + "External id": 979773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782837.353, "dur": 4.545, + "args": { + "External id": 979774,"Record function id": 0, "Ev Idx": 2365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782838.793, "dur": 2.613, + "args": { + "External id": 979775,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782839.614, "dur": 1.351, + "args": { + "External id": 979776,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782840.153, "dur": 0.727, + "args": { + "External id": 979777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782845.597, "dur": 4.603, + "args": { + "External id": 979778,"Record function id": 0, "Ev Idx": 2369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782847.101, "dur": 2.604, + "args": { + "External id": 979779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782847.665, "dur": 1.528, + "args": { + "External id": 979780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782848.258, "dur": 0.851, + "args": { + "External id": 979781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782854.033, "dur": 4.394, + "args": { + "External id": 979782,"Record function id": 0, "Ev Idx": 2373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782855.528, "dur": 2.418, + "args": { + "External id": 979783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782856.209, "dur": 1.254, + "args": { + "External id": 979784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782856.694, "dur": 0.694, + "args": { + "External id": 979785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782862.105, "dur": 6.433, + "args": { + "External id": 979786,"Record function id": 0, "Ev Idx": 2377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782863.578, "dur": 4.475, + "args": { + "External id": 979787,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782864.154, "dur": 3.413, + "args": { + "External id": 979788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782866.799, "dur": 0.648, + "args": { + "External id": 979789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782873.313, "dur": 4.747, + "args": { + "External id": 979790,"Record function id": 0, "Ev Idx": 2381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937782874.823, "dur": 2.734, + "args": { + "External id": 979791,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782875.800, "dur": 1.300, + "args": { + "External id": 979792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937782876.300, "dur": 0.693, + "args": { + "External id": 979793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937782882.687, "dur": 70680.763, + "args": { + "External id": 979794,"Record function id": 0, "Sequence number": 10552285, "Fwd thread id": 1, "Ev Idx": 2385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937782884.469, "dur": 70669.030, + "args": { + "External id": 979795,"Sequence number": 10552285, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2386 + } + }, + { + "ph": "f", "id": 181, "pid": 2338708, "tid": 2379421, "ts": 6345937782884.469, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 2338708, "tid": 2379421, + "ts": 6345937782919.294, "dur": 44.183, + "args": { + "External id": 979796,"Record function id": 0, "Ev Idx": 2387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 2338708, "tid": 2379421, + "ts": 6345937782972.580, "dur": 138.984, + "args": { + "External id": 979797,"Record function id": 0, "Ev Idx": 2388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 2338708, "tid": 2379421, + "ts": 6345937783121.809, "dur": 70422.162, + "args": { + "External id": 979798,"Record function id": 0, "Ev Idx": 2389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937783228.242, "dur": 8.839, + "args": { + "External id": 979799,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937783250.555, "dur": 8.619, + "args": { + "External id": 979800,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937783276.225, "dur": 69285.880, + "args": { + "External id": 979801,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937783294.895, "dur": 69252.825, + "args": { + "External id": 979802,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937783394.428, "dur": 19.055, + "args": { + "External id": 979803,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937783435.949, "dur": 69060.791, + "args": { + "External id": 979804,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937783441.337, "dur": 69054.269, + "args": { + "External id": 979805,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937783446.600, "dur": 11.782, + "args": { + "External id": 979806,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937783460.979, "dur": 69028.799, + "args": { + "External id": 979807,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937852681.349, "dur": 13.631, + "args": { + "External id": 979808,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937852685.190, "dur": 9.164, + "args": { + "External id": 979809,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937852729.112, "dur": 452.459, + "args": { + "External id": 979810,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937852765.181, "dur": 409.645, + "args": { + "External id": 979811,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2402, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937852779.006, "dur": 388.494, + "args": { + "External id": 979812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937853220.295, "dur": 3.015, + "args": { + "External id": 979813,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2404, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937853298.028, "dur": 8.187, + "args": { + "External id": 979814,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937853358.566, "dur": 1.879, + "args": { + "External id": 979815,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937853380.172, "dur": 4.163, + "args": { + "External id": 979816,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937853398.541, "dur": 0.815, + "args": { + "External id": 979817,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937853414.084, "dur": 0.885, + "args": { + "External id": 979818,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937853427.569, "dur": 0.789, + "args": { + "External id": 979819,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937853442.036, "dur": 3.245, + "args": { + "External id": 979820,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937853458.023, "dur": 2.499, + "args": { + "External id": 979821,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937853475.080, "dur": 0.980, + "args": { + "External id": 979822,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937853579.816, "dur": 3307.431, + "args": { + "External id": 979823,"Record function id": 0, "Ev Idx": 2414 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 2338708, "tid": 2379421, + "ts": 6345937853601.410, "dur": 1211.856, + "args": { + "External id": 979824,"Record function id": 0, "Ev Idx": 2415 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338708, "tid": 2379421, + "ts": 6345937853618.421, "dur": 359.474, + "args": { + "External id": 979825,"Record function id": 0, "Ev Idx": 2416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937853712.104, "dur": 4.633, + "args": { + "External id": 979826,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937853720.577, "dur": 1.141, + "args": { + "External id": 979827,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937853724.027, "dur": 3.105, + "args": { + "External id": 979828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937853729.566, "dur": 0.969, + "args": { + "External id": 979829,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937853732.350, "dur": 0.907, + "args": { + "External id": 979830,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937853734.907, "dur": 1.173, + "args": { + "External id": 979831,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937853738.282, "dur": 2.720, + "args": { + "External id": 979832,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937853744.860, "dur": 1.042, + "args": { + "External id": 979833,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937853747.770, "dur": 1.000, + "args": { + "External id": 979834,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937853750.548, "dur": 0.675, + "args": { + "External id": 979835,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937853770.718, "dur": 174.423, + "args": { + "External id": 979836,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937853788.732, "dur": 151.112, + "args": { + "External id": 979837,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937853812.163, "dur": 18.647, + "args": { + "External id": 979838,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937853834.840, "dur": 73.401, + "args": { + "External id": 979839,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937853837.670, "dur": 70.201, + "args": { + "External id": 979840,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937853842.526, "dur": 7.036, + "args": { + "External id": 979841,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937853851.763, "dur": 55.482, + "args": { + "External id": 979842,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2433 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 2338708, "tid": 2379421, + "ts": 6345937854135.646, "dur": 668.327, + "args": { + "External id": 979843,"Record function id": 0, "Ev Idx": 2434 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338708, "tid": 2379421, + "ts": 6345937854156.362, "dur": 632.863, + "args": { + "External id": 979844,"Record function id": 0, "Ev Idx": 2435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937854223.268, "dur": 7.444, + "args": { + "External id": 979845,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937854248.816, "dur": 40.428, + "args": { + "External id": 979846,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854254.835, "dur": 2.983, + "args": { + "External id": 979847,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854260.080, "dur": 0.522, + "args": { + "External id": 979848,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854262.411, "dur": 0.360, + "args": { + "External id": 979849,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854266.223, "dur": 0.436, + "args": { + "External id": 979850,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854268.371, "dur": 0.521, + "args": { + "External id": 979851,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854270.751, "dur": 2.640, + "args": { + "External id": 979852,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854276.732, "dur": 0.569, + "args": { + "External id": 979853,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854279.031, "dur": 0.544, + "args": { + "External id": 979854,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854281.376, "dur": 1.506, + "args": { + "External id": 979855,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937854300.830, "dur": 52.869, + "args": { + "External id": 979856,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345937854390.079, "dur": 130.737, + "args": { + "External id": 979857,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937854401.976, "dur": 4.025, + "args": { + "External id": 979858,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345937854411.759, "dur": 12.472, + "args": { + "External id": 979859,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937854416.960, "dur": 6.791, + "args": { + "External id": 979860,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854421.620, "dur": 0.592, + "args": { + "External id": 979861,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937854432.157, "dur": 30.525, + "args": { + "External id": 979862,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854434.429, "dur": 0.467, + "args": { + "External id": 979863,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854436.859, "dur": 0.487, + "args": { + "External id": 979864,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854439.060, "dur": 3.076, + "args": { + "External id": 979865,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854444.206, "dur": 0.386, + "args": { + "External id": 979866,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854446.089, "dur": 0.369, + "args": { + "External id": 979867,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854449.733, "dur": 0.342, + "args": { + "External id": 979868,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854451.800, "dur": 0.352, + "args": { + "External id": 979869,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854453.834, "dur": 0.439, + "args": { + "External id": 979870,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937854456.811, "dur": 0.322, + "args": { + "External id": 979871,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937854476.235, "dur": 35.506, + "args": { + "External id": 979872,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937854573.882, "dur": 135.084, + "args": { + "External id": 979873,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937854606.692, "dur": 98.093, + "args": { + "External id": 979874,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2465, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937854618.880, "dur": 80.842, + "args": { + "External id": 979875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937854729.022, "dur": 2.144, + "args": { + "External id": 979876,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2467, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937854821.847, "dur": 2040.135, + "args": { + "External id": 979877,"Sequence number": 10552284, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2468 + } + }, + { + "ph": "f", "id": 182, "pid": 2338708, "tid": 2379421, "ts": 6345937854821.847, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937854947.027, "dur": 182.011, + "args": { + "External id": 979878,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937855183.881, "dur": 44.365, + "args": { + "External id": 979879,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345937855249.907, "dur": 63.437, + "args": { + "External id": 979880,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937855328.268, "dur": 36.032, + "args": { + "External id": 979881,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937855371.802, "dur": 36.254, + "args": { + "External id": 979882,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937855415.632, "dur": 31.344, + "args": { + "External id": 979883,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937855455.667, "dur": 32.384, + "args": { + "External id": 979884,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937855518.359, "dur": 24.484, + "args": { + "External id": 979885,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937855565.977, "dur": 31.745, + "args": { + "External id": 979886,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937855623.985, "dur": 21.725, + "args": { + "External id": 979887,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937855661.984, "dur": 17.690, + "args": { + "External id": 979888,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937855687.767, "dur": 42.907, + "args": { + "External id": 979889,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937855734.654, "dur": 36.873, + "args": { + "External id": 979890,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937855808.631, "dur": 368.928, + "args": { + "External id": 979891,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937855898.287, "dur": 6.683, + "args": { + "External id": 979892,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937855907.207, "dur": 3.010, + "args": { + "External id": 979893,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937855912.009, "dur": 2.466, + "args": { + "External id": 979894,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937855915.960, "dur": 2.569, + "args": { + "External id": 979895,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937855968.669, "dur": 9.292, + "args": { + "External id": 979896,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937855974.642, "dur": 3.149, + "args": { + "External id": 979897,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937855980.544, "dur": 60.314, + "args": { + "External id": 979898,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937855987.617, "dur": 3.827, + "args": { + "External id": 979899,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937856044.621, "dur": 2.891, + "args": { + "External id": 979900,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937856046.334, "dur": 1.045, + "args": { + "External id": 979901,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937856048.821, "dur": 59.961, + "args": { + "External id": 979902,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937856051.591, "dur": 36.693, + "args": { + "External id": 979903,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937856232.187, "dur": 38.090, + "args": { + "External id": 979904,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937856293.129, "dur": 19.314, + "args": { + "External id": 979905,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937856322.083, "dur": 62.098, + "args": { + "External id": 979906,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937856391.824, "dur": 46.058, + "args": { + "External id": 979907,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937856449.840, "dur": 26.838, + "args": { + "External id": 979908,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937856483.432, "dur": 35.345, + "args": { + "External id": 979909,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937856526.172, "dur": 33.122, + "args": { + "External id": 979910,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937856566.907, "dur": 33.716, + "args": { + "External id": 979911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345937856624.977, "dur": 27.554, + "args": { + "External id": 979912,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937856687.477, "dur": 27.942, + "args": { + "External id": 979913,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937856734.760, "dur": 20.634, + "args": { + "External id": 979914,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937856773.642, "dur": 19.488, + "args": { + "External id": 979915,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345937856809.951, "dur": 18.135, + "args": { + "External id": 979916,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856912.357, "dur": 17.705, + "args": { + "External id": 979917,"Record function id": 0, "Ev Idx": 2508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856916.627, "dur": 12.385, + "args": { + "External id": 979918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856921.562, "dur": 6.398, + "args": { + "External id": 979919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856923.353, "dur": 4.494, + "args": { + "External id": 979920,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856934.714, "dur": 5.681, + "args": { + "External id": 979921,"Record function id": 0, "Ev Idx": 2512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856936.695, "dur": 3.197, + "args": { + "External id": 979922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856937.513, "dur": 1.833, + "args": { + "External id": 979923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856938.109, "dur": 1.151, + "args": { + "External id": 979924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856944.645, "dur": 7.412, + "args": { + "External id": 979925,"Record function id": 0, "Ev Idx": 2516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856946.665, "dur": 4.799, + "args": { + "External id": 979926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856947.377, "dur": 3.580, + "args": { + "External id": 979927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856948.106, "dur": 2.721, + "args": { + "External id": 979928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856955.938, "dur": 5.167, + "args": { + "External id": 979929,"Record function id": 0, "Ev Idx": 2520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856957.825, "dur": 2.762, + "args": { + "External id": 979930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856958.569, "dur": 1.524, + "args": { + "External id": 979931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856959.026, "dur": 0.961, + "args": { + "External id": 979932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856965.200, "dur": 4.415, + "args": { + "External id": 979933,"Record function id": 0, "Ev Idx": 2524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856966.737, "dur": 2.369, + "args": { + "External id": 979934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856967.406, "dur": 1.198, + "args": { + "External id": 979935,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856967.713, "dur": 0.814, + "args": { + "External id": 979936,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856973.697, "dur": 4.498, + "args": { + "External id": 979937,"Record function id": 0, "Ev Idx": 2528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856975.117, "dur": 2.593, + "args": { + "External id": 979938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856975.674, "dur": 1.549, + "args": { + "External id": 979939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856976.360, "dur": 0.774, + "args": { + "External id": 979940,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856982.073, "dur": 5.161, + "args": { + "External id": 979941,"Record function id": 0, "Ev Idx": 2532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856983.837, "dur": 2.936, + "args": { + "External id": 979942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856984.778, "dur": 1.504, + "args": { + "External id": 979943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856985.233, "dur": 0.959, + "args": { + "External id": 979944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856991.207, "dur": 6.467, + "args": { + "External id": 979945,"Record function id": 0, "Ev Idx": 2536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937856992.754, "dur": 4.485, + "args": { + "External id": 979946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856993.475, "dur": 3.275, + "args": { + "External id": 979947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937856995.868, "dur": 0.793, + "args": { + "External id": 979948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937857001.597, "dur": 4.950, + "args": { + "External id": 979949,"Record function id": 0, "Ev Idx": 2540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937857003.060, "dur": 2.995, + "args": { + "External id": 979950,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937857003.974, "dur": 1.582, + "args": { + "External id": 979951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937857004.453, "dur": 1.013, + "args": { + "External id": 979952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937857032.361, "dur": 67633.560, + "args": { + "External id": 979953,"Record function id": 0, "Sequence number": 10552283, "Fwd thread id": 1, "Ev Idx": 2544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937857034.527, "dur": 67621.037, + "args": { + "External id": 979954,"Sequence number": 10552283, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2545 + } + }, + { + "ph": "f", "id": 183, "pid": 2338708, "tid": 2379421, "ts": 6345937857034.527, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 2338708, "tid": 2379421, + "ts": 6345937857110.178, "dur": 44.529, + "args": { + "External id": 979955,"Record function id": 0, "Ev Idx": 2546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 2338708, "tid": 2379421, + "ts": 6345937857164.160, "dur": 76.673, + "args": { + "External id": 979956,"Record function id": 0, "Ev Idx": 2547 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 2338708, "tid": 2379421, + "ts": 6345937857248.527, "dur": 67397.953, + "args": { + "External id": 979957,"Record function id": 0, "Ev Idx": 2548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937857350.420, "dur": 8.860, + "args": { + "External id": 979958,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937857371.782, "dur": 7.417, + "args": { + "External id": 979959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937857396.355, "dur": 66131.043, + "args": { + "External id": 979960,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937857413.627, "dur": 66099.451, + "args": { + "External id": 979961,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937857512.928, "dur": 20.203, + "args": { + "External id": 979962,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937857558.261, "dur": 65899.666, + "args": { + "External id": 979963,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937857561.358, "dur": 65895.334, + "args": { + "External id": 979964,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937857567.382, "dur": 9.438, + "args": { + "External id": 979965,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937857581.395, "dur": 65868.955, + "args": { + "External id": 979966,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937923653.371, "dur": 14.520, + "args": { + "External id": 979967,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937923657.674, "dur": 9.765, + "args": { + "External id": 979968,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937923736.753, "dur": 540.031, + "args": { + "External id": 979969,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937923776.124, "dur": 493.726, + "args": { + "External id": 979970,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2561, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937923789.737, "dur": 470.819, + "args": { + "External id": 979971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937924306.963, "dur": 3.300, + "args": { + "External id": 979972,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2563, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937924391.682, "dur": 8.561, + "args": { + "External id": 979973,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937924453.093, "dur": 1.572, + "args": { + "External id": 979974,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937924473.605, "dur": 4.837, + "args": { + "External id": 979975,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937924493.859, "dur": 1.544, + "args": { + "External id": 979976,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937924511.654, "dur": 0.981, + "args": { + "External id": 979977,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937924525.634, "dur": 0.880, + "args": { + "External id": 979978,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937924539.745, "dur": 3.097, + "args": { + "External id": 979979,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937924556.381, "dur": 2.533, + "args": { + "External id": 979980,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937924571.714, "dur": 2.508, + "args": { + "External id": 979981,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937924682.743, "dur": 3316.608, + "args": { + "External id": 979982,"Record function id": 0, "Ev Idx": 2573 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 2338708, "tid": 2379421, + "ts": 6345937924707.494, "dur": 1205.986, + "args": { + "External id": 979983,"Record function id": 0, "Ev Idx": 2574 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338708, "tid": 2379421, + "ts": 6345937924725.185, "dur": 439.819, + "args": { + "External id": 979984,"Record function id": 0, "Ev Idx": 2575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937924813.574, "dur": 4.556, + "args": { + "External id": 979985,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937924822.208, "dur": 0.651, + "args": { + "External id": 979986,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937924824.935, "dur": 3.074, + "args": { + "External id": 979987,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937924830.090, "dur": 1.077, + "args": { + "External id": 979988,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937924832.934, "dur": 1.016, + "args": { + "External id": 979989,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937924835.586, "dur": 0.970, + "args": { + "External id": 979990,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937924838.518, "dur": 1.900, + "args": { + "External id": 979991,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937924844.581, "dur": 0.797, + "args": { + "External id": 979992,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937924847.216, "dur": 0.747, + "args": { + "External id": 979993,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937924849.868, "dur": 0.920, + "args": { + "External id": 979994,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937924871.190, "dur": 250.636, + "args": { + "External id": 979995,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937924889.679, "dur": 224.646, + "args": { + "External id": 979996,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937924909.138, "dur": 17.101, + "args": { + "External id": 979997,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937924932.499, "dur": 104.976, + "args": { + "External id": 979998,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937924935.789, "dur": 101.081, + "args": { + "External id": 979999,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937924940.405, "dur": 6.825, + "args": { + "External id": 980000,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937924949.216, "dur": 86.272, + "args": { + "External id": 980001,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2592 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 2338708, "tid": 2379421, + "ts": 6345937925261.275, "dur": 642.378, + "args": { + "External id": 980002,"Record function id": 0, "Ev Idx": 2593 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338708, "tid": 2379421, + "ts": 6345937925281.649, "dur": 607.153, + "args": { + "External id": 980003,"Record function id": 0, "Ev Idx": 2594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937925345.986, "dur": 7.173, + "args": { + "External id": 980004,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937925371.403, "dur": 38.310, + "args": { + "External id": 980005,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925377.456, "dur": 1.914, + "args": { + "External id": 980006,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925381.869, "dur": 1.221, + "args": { + "External id": 980007,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925384.725, "dur": 0.653, + "args": { + "External id": 980008,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925387.182, "dur": 0.394, + "args": { + "External id": 980009,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925389.802, "dur": 0.479, + "args": { + "External id": 980010,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925392.529, "dur": 2.975, + "args": { + "External id": 980011,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925397.769, "dur": 0.469, + "args": { + "External id": 980012,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925400.382, "dur": 0.622, + "args": { + "External id": 980013,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925403.321, "dur": 0.615, + "args": { + "External id": 980014,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937925421.392, "dur": 49.633, + "args": { + "External id": 980015,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345937925508.358, "dur": 127.067, + "args": { + "External id": 980016,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937925520.859, "dur": 3.651, + "args": { + "External id": 980017,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345937925530.050, "dur": 12.534, + "args": { + "External id": 980018,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937925535.487, "dur": 6.596, + "args": { + "External id": 980019,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925539.763, "dur": 0.739, + "args": { + "External id": 980020,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937925550.241, "dur": 31.263, + "args": { + "External id": 980021,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925553.304, "dur": 0.446, + "args": { + "External id": 980022,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925555.809, "dur": 0.630, + "args": { + "External id": 980023,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925558.710, "dur": 3.000, + "args": { + "External id": 980024,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925564.487, "dur": 0.483, + "args": { + "External id": 980025,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925567.205, "dur": 0.359, + "args": { + "External id": 980026,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925569.757, "dur": 0.424, + "args": { + "External id": 980027,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925571.863, "dur": 0.477, + "args": { + "External id": 980028,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925574.427, "dur": 0.305, + "args": { + "External id": 980029,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937925576.828, "dur": 0.497, + "args": { + "External id": 980030,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937925593.475, "dur": 32.915, + "args": { + "External id": 980031,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937925684.021, "dur": 126.406, + "args": { + "External id": 980032,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937925709.202, "dur": 97.277, + "args": { + "External id": 980033,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2624, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937925720.496, "dur": 81.461, + "args": { + "External id": 980034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937925829.023, "dur": 2.289, + "args": { + "External id": 980035,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2626, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937925921.915, "dur": 2051.636, + "args": { + "External id": 980036,"Sequence number": 10552282, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2627 + } + }, + { + "ph": "f", "id": 184, "pid": 2338708, "tid": 2379421, "ts": 6345937925921.915, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937926110.738, "dur": 123.853, + "args": { + "External id": 980037,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937926282.752, "dur": 46.067, + "args": { + "External id": 980038,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345937926348.448, "dur": 55.863, + "args": { + "External id": 980039,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937926417.403, "dur": 35.883, + "args": { + "External id": 980040,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937926461.228, "dur": 38.347, + "args": { + "External id": 980041,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937926508.304, "dur": 35.636, + "args": { + "External id": 980042,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937926553.245, "dur": 36.154, + "args": { + "External id": 980043,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937926619.683, "dur": 26.352, + "args": { + "External id": 980044,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937926668.369, "dur": 30.973, + "args": { + "External id": 980045,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937926722.395, "dur": 22.316, + "args": { + "External id": 980046,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937926761.017, "dur": 17.871, + "args": { + "External id": 980047,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937926789.210, "dur": 42.712, + "args": { + "External id": 980048,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937926836.515, "dur": 38.295, + "args": { + "External id": 980049,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937926909.888, "dur": 388.563, + "args": { + "External id": 980050,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937927001.268, "dur": 24.527, + "args": { + "External id": 980051,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937927029.785, "dur": 3.843, + "args": { + "External id": 980052,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937927035.278, "dur": 2.408, + "args": { + "External id": 980053,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937927039.288, "dur": 2.193, + "args": { + "External id": 980054,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937927132.219, "dur": 6.878, + "args": { + "External id": 980055,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937927134.945, "dur": 3.821, + "args": { + "External id": 980056,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937927141.918, "dur": 56.852, + "args": { + "External id": 980057,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937927149.625, "dur": 3.982, + "args": { + "External id": 980058,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937927204.932, "dur": 2.639, + "args": { + "External id": 980059,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937927206.388, "dur": 1.107, + "args": { + "External id": 980060,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937927209.360, "dur": 23.995, + "args": { + "External id": 980061,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937927213.065, "dur": 0.721, + "args": { + "External id": 980062,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937927346.317, "dur": 38.039, + "args": { + "External id": 980063,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937927404.796, "dur": 19.603, + "args": { + "External id": 980064,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937927435.311, "dur": 61.557, + "args": { + "External id": 980065,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937927509.201, "dur": 47.840, + "args": { + "External id": 980066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937927571.305, "dur": 27.490, + "args": { + "External id": 980067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937927606.463, "dur": 37.953, + "args": { + "External id": 980068,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937927653.746, "dur": 35.466, + "args": { + "External id": 980069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937927698.233, "dur": 36.264, + "args": { + "External id": 980070,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345937927756.569, "dur": 28.502, + "args": { + "External id": 980071,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937927803.270, "dur": 28.147, + "args": { + "External id": 980072,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937927847.460, "dur": 21.221, + "args": { + "External id": 980073,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937927887.041, "dur": 18.871, + "args": { + "External id": 980074,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345937927919.987, "dur": 19.182, + "args": { + "External id": 980075,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928045.783, "dur": 56.799, + "args": { + "External id": 980076,"Record function id": 0, "Ev Idx": 2667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928050.471, "dur": 50.114, + "args": { + "External id": 980077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928090.800, "dur": 8.005, + "args": { + "External id": 980078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928093.111, "dur": 5.334, + "args": { + "External id": 980079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928110.482, "dur": 6.843, + "args": { + "External id": 980080,"Record function id": 0, "Ev Idx": 2671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928112.991, "dur": 3.858, + "args": { + "External id": 980081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928113.917, "dur": 2.394, + "args": { + "External id": 980082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928115.097, "dur": 1.124, + "args": { + "External id": 980083,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928121.430, "dur": 8.684, + "args": { + "External id": 980084,"Record function id": 0, "Ev Idx": 2675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928123.596, "dur": 6.022, + "args": { + "External id": 980085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928124.518, "dur": 4.605, + "args": { + "External id": 980086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928125.190, "dur": 3.861, + "args": { + "External id": 980087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928134.020, "dur": 5.321, + "args": { + "External id": 980088,"Record function id": 0, "Ev Idx": 2679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928135.704, "dur": 3.160, + "args": { + "External id": 980089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928136.354, "dur": 2.012, + "args": { + "External id": 980090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928137.196, "dur": 1.093, + "args": { + "External id": 980091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928143.322, "dur": 5.929, + "args": { + "External id": 980092,"Record function id": 0, "Ev Idx": 2683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928145.077, "dur": 3.678, + "args": { + "External id": 980093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928146.151, "dur": 2.085, + "args": { + "External id": 980094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928147.276, "dur": 0.873, + "args": { + "External id": 980095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928153.521, "dur": 5.208, + "args": { + "External id": 980096,"Record function id": 0, "Ev Idx": 2687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928155.301, "dur": 2.910, + "args": { + "External id": 980097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928156.225, "dur": 1.495, + "args": { + "External id": 980098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928156.853, "dur": 0.744, + "args": { + "External id": 980099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928162.725, "dur": 4.404, + "args": { + "External id": 980100,"Record function id": 0, "Ev Idx": 2691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928164.206, "dur": 2.393, + "args": { + "External id": 980101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928164.860, "dur": 1.257, + "args": { + "External id": 980102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928165.206, "dur": 0.796, + "args": { + "External id": 980103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928171.126, "dur": 3.992, + "args": { + "External id": 980104,"Record function id": 0, "Ev Idx": 2695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928172.436, "dur": 2.189, + "args": { + "External id": 980105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928173.031, "dur": 1.083, + "args": { + "External id": 980106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928173.376, "dur": 0.617, + "args": { + "External id": 980107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928178.851, "dur": 4.814, + "args": { + "External id": 980108,"Record function id": 0, "Ev Idx": 2699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937928180.165, "dur": 3.008, + "args": { + "External id": 980109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928180.756, "dur": 1.911, + "args": { + "External id": 980110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937928181.678, "dur": 0.843, + "args": { + "External id": 980111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937928188.562, "dur": 61015.473, + "args": { + "External id": 980112,"Record function id": 0, "Sequence number": 10552281, "Fwd thread id": 1, "Ev Idx": 2703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937928190.529, "dur": 61002.924, + "args": { + "External id": 980113,"Sequence number": 10552281, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2704 + } + }, + { + "ph": "f", "id": 185, "pid": 2338708, "tid": 2379421, "ts": 6345937928190.529, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 2338708, "tid": 2379421, + "ts": 6345937928227.658, "dur": 48.764, + "args": { + "External id": 980114,"Record function id": 0, "Ev Idx": 2705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 2338708, "tid": 2379421, + "ts": 6345937928285.962, "dur": 75.589, + "args": { + "External id": 980115,"Record function id": 0, "Ev Idx": 2706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 2338708, "tid": 2379421, + "ts": 6345937928369.608, "dur": 60813.106, + "args": { + "External id": 980116,"Record function id": 0, "Ev Idx": 2707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937928476.337, "dur": 8.683, + "args": { + "External id": 980117,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937928497.218, "dur": 7.088, + "args": { + "External id": 980118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937928521.839, "dur": 59663.473, + "args": { + "External id": 980119,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937928537.720, "dur": 59632.841, + "args": { + "External id": 980120,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937928669.166, "dur": 20.475, + "args": { + "External id": 980121,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937928713.142, "dur": 59403.478, + "args": { + "External id": 980122,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937928716.727, "dur": 59398.750, + "args": { + "External id": 980123,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937928722.072, "dur": 9.892, + "args": { + "External id": 980124,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937928734.721, "dur": 59374.673, + "args": { + "External id": 980125,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937988307.305, "dur": 13.588, + "args": { + "External id": 980126,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937988311.830, "dur": 8.606, + "args": { + "External id": 980127,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937988354.758, "dur": 396.607, + "args": { + "External id": 980128,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937988386.709, "dur": 359.031, + "args": { + "External id": 980129,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2720, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937988399.701, "dur": 339.457, + "args": { + "External id": 980130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937988772.736, "dur": 2.601, + "args": { + "External id": 980131,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2722, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937988841.016, "dur": 7.660, + "args": { + "External id": 980132,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937988904.111, "dur": 1.844, + "args": { + "External id": 980133,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937988925.458, "dur": 3.779, + "args": { + "External id": 980134,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937988944.171, "dur": 1.257, + "args": { + "External id": 980135,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937988959.664, "dur": 1.364, + "args": { + "External id": 980136,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937988974.597, "dur": 0.993, + "args": { + "External id": 980137,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937988989.354, "dur": 3.192, + "args": { + "External id": 980138,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937989007.303, "dur": 23.826, + "args": { + "External id": 980139,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937989097.316, "dur": 2.279, + "args": { + "External id": 980140,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937989223.490, "dur": 3382.433, + "args": { + "External id": 980141,"Record function id": 0, "Ev Idx": 2732 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 2338708, "tid": 2379421, + "ts": 6345937989247.006, "dur": 1233.142, + "args": { + "External id": 980142,"Record function id": 0, "Ev Idx": 2733 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338708, "tid": 2379421, + "ts": 6345937989265.296, "dur": 375.250, + "args": { + "External id": 980143,"Record function id": 0, "Ev Idx": 2734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937989367.523, "dur": 5.007, + "args": { + "External id": 980144,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937989376.865, "dur": 1.045, + "args": { + "External id": 980145,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937989380.241, "dur": 3.509, + "args": { + "External id": 980146,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937989385.785, "dur": 1.200, + "args": { + "External id": 980147,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937989388.659, "dur": 0.931, + "args": { + "External id": 980148,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937989391.599, "dur": 1.039, + "args": { + "External id": 980149,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937989394.830, "dur": 2.389, + "args": { + "External id": 980150,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937989398.985, "dur": 1.060, + "args": { + "External id": 980151,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937989401.620, "dur": 1.019, + "args": { + "External id": 980152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937989404.068, "dur": 0.812, + "args": { + "External id": 980153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937989425.554, "dur": 180.771, + "args": { + "External id": 980154,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937989443.878, "dur": 156.917, + "args": { + "External id": 980155,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937989462.747, "dur": 20.008, + "args": { + "External id": 980156,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937989487.113, "dur": 78.453, + "args": { + "External id": 980157,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937989490.103, "dur": 74.964, + "args": { + "External id": 980158,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937989495.118, "dur": 6.686, + "args": { + "External id": 980159,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937989504.161, "dur": 60.364, + "args": { + "External id": 980160,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2751 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 2338708, "tid": 2379421, + "ts": 6345937989732.808, "dur": 736.720, + "args": { + "External id": 980161,"Record function id": 0, "Ev Idx": 2752 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338708, "tid": 2379421, + "ts": 6345937989752.135, "dur": 701.987, + "args": { + "External id": 980162,"Record function id": 0, "Ev Idx": 2753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937989813.440, "dur": 6.314, + "args": { + "External id": 980163,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937989837.725, "dur": 37.854, + "args": { + "External id": 980164,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937989843.467, "dur": 2.038, + "args": { + "External id": 980165,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937989848.227, "dur": 0.758, + "args": { + "External id": 980166,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937989851.356, "dur": 0.484, + "args": { + "External id": 980167,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937989854.545, "dur": 0.345, + "args": { + "External id": 980168,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937989856.956, "dur": 0.647, + "args": { + "External id": 980169,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937989859.634, "dur": 3.148, + "args": { + "External id": 980170,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937989864.827, "dur": 0.543, + "args": { + "External id": 980171,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937989867.605, "dur": 0.467, + "args": { + "External id": 980172,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937989869.998, "dur": 0.838, + "args": { + "External id": 980173,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937989891.506, "dur": 55.296, + "args": { + "External id": 980174,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345937989983.386, "dur": 205.904, + "args": { + "External id": 980175,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937989995.722, "dur": 3.643, + "args": { + "External id": 980176,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345937990005.411, "dur": 34.996, + "args": { + "External id": 980177,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345937990031.404, "dur": 8.469, + "args": { + "External id": 980178,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937990036.390, "dur": 1.224, + "args": { + "External id": 980179,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345937990051.126, "dur": 72.437, + "args": { + "External id": 980180,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937990092.262, "dur": 1.105, + "args": { + "External id": 980181,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937990096.158, "dur": 0.643, + "args": { + "External id": 980182,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937990099.301, "dur": 3.169, + "args": { + "External id": 980183,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937990104.919, "dur": 0.670, + "args": { + "External id": 980184,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937990107.525, "dur": 0.427, + "args": { + "External id": 980185,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937990110.517, "dur": 0.423, + "args": { + "External id": 980186,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937990112.816, "dur": 0.354, + "args": { + "External id": 980187,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937990115.391, "dur": 0.456, + "args": { + "External id": 980188,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937990117.801, "dur": 0.677, + "args": { + "External id": 980189,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937990139.961, "dur": 39.550, + "args": { + "External id": 980190,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345937990246.101, "dur": 128.680, + "args": { + "External id": 980191,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937990273.450, "dur": 97.377, + "args": { + "External id": 980192,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2783, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345937990284.708, "dur": 81.266, + "args": { + "External id": 980193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345937990391.881, "dur": 1.812, + "args": { + "External id": 980194,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2785, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937990488.542, "dur": 2093.915, + "args": { + "External id": 980195,"Sequence number": 10552280, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2786 + } + }, + { + "ph": "f", "id": 186, "pid": 2338708, "tid": 2379421, "ts": 6345937990488.542, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937990617.775, "dur": 118.839, + "args": { + "External id": 980196,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937990780.876, "dur": 44.406, + "args": { + "External id": 980197,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345937990846.898, "dur": 57.472, + "args": { + "External id": 980198,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937990918.561, "dur": 38.550, + "args": { + "External id": 980199,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937990966.241, "dur": 40.117, + "args": { + "External id": 980200,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937991040.877, "dur": 82.496, + "args": { + "External id": 980201,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937991137.476, "dur": 39.115, + "args": { + "External id": 980202,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937991209.880, "dur": 30.331, + "args": { + "External id": 980203,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345937991261.836, "dur": 33.213, + "args": { + "External id": 980204,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937991319.499, "dur": 24.992, + "args": { + "External id": 980205,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937991358.707, "dur": 18.907, + "args": { + "External id": 980206,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937991389.028, "dur": 46.624, + "args": { + "External id": 980207,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937991440.252, "dur": 37.979, + "args": { + "External id": 980208,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345937991513.991, "dur": 302.443, + "args": { + "External id": 980209,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937991608.977, "dur": 6.941, + "args": { + "External id": 980210,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937991618.462, "dur": 3.090, + "args": { + "External id": 980211,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937991623.204, "dur": 2.361, + "args": { + "External id": 980212,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937991626.928, "dur": 2.441, + "args": { + "External id": 980213,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937991679.496, "dur": 5.961, + "args": { + "External id": 980214,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937991682.073, "dur": 3.154, + "args": { + "External id": 980215,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937991688.151, "dur": 40.198, + "args": { + "External id": 980216,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937991694.896, "dur": 4.180, + "args": { + "External id": 980217,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345937991730.486, "dur": 2.145, + "args": { + "External id": 980218,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937991731.772, "dur": 0.751, + "args": { + "External id": 980219,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345937991734.574, "dur": 19.403, + "args": { + "External id": 980220,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937991737.067, "dur": 1.092, + "args": { + "External id": 980221,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345937991866.981, "dur": 38.325, + "args": { + "External id": 980222,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937991925.676, "dur": 24.445, + "args": { + "External id": 980223,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937991960.101, "dur": 79.118, + "args": { + "External id": 980224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937992051.235, "dur": 108.706, + "args": { + "External id": 980225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937992178.534, "dur": 27.594, + "args": { + "External id": 980226,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937992214.359, "dur": 37.433, + "args": { + "External id": 980227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937992261.011, "dur": 33.183, + "args": { + "External id": 980228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345937992303.502, "dur": 38.633, + "args": { + "External id": 980229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345937992366.627, "dur": 29.752, + "args": { + "External id": 980230,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937992415.149, "dur": 27.538, + "args": { + "External id": 980231,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345937992459.126, "dur": 19.843, + "args": { + "External id": 980232,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345937992498.410, "dur": 16.043, + "args": { + "External id": 980233,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345937992529.280, "dur": 18.248, + "args": { + "External id": 980234,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992631.983, "dur": 17.903, + "args": { + "External id": 980235,"Record function id": 0, "Ev Idx": 2826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992635.969, "dur": 12.862, + "args": { + "External id": 980236,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992640.667, "dur": 6.792, + "args": { + "External id": 980237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992642.812, "dur": 4.497, + "args": { + "External id": 980238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992654.609, "dur": 5.907, + "args": { + "External id": 980239,"Record function id": 0, "Ev Idx": 2830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992656.446, "dur": 3.376, + "args": { + "External id": 980240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992657.106, "dur": 2.189, + "args": { + "External id": 980241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992657.856, "dur": 1.299, + "args": { + "External id": 980242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992664.638, "dur": 8.259, + "args": { + "External id": 980243,"Record function id": 0, "Ev Idx": 2834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992666.291, "dur": 6.125, + "args": { + "External id": 980244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992666.997, "dur": 4.872, + "args": { + "External id": 980245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992667.657, "dur": 4.114, + "args": { + "External id": 980246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992677.059, "dur": 4.826, + "args": { + "External id": 980247,"Record function id": 0, "Ev Idx": 2838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992678.620, "dur": 2.764, + "args": { + "External id": 980248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992679.395, "dur": 1.500, + "args": { + "External id": 980249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992679.915, "dur": 0.901, + "args": { + "External id": 980250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992685.762, "dur": 5.364, + "args": { + "External id": 980251,"Record function id": 0, "Ev Idx": 2842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992687.224, "dur": 3.419, + "args": { + "External id": 980252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992688.221, "dur": 1.901, + "args": { + "External id": 980253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992689.197, "dur": 0.838, + "args": { + "External id": 980254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992695.182, "dur": 4.887, + "args": { + "External id": 980255,"Record function id": 0, "Ev Idx": 2846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992696.795, "dur": 2.783, + "args": { + "External id": 980256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992697.429, "dur": 1.619, + "args": { + "External id": 980257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992698.169, "dur": 0.736, + "args": { + "External id": 980258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992704.162, "dur": 4.800, + "args": { + "External id": 980259,"Record function id": 0, "Ev Idx": 2850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992705.701, "dur": 2.761, + "args": { + "External id": 980260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992706.391, "dur": 1.546, + "args": { + "External id": 980261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992706.929, "dur": 0.893, + "args": { + "External id": 980262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992712.872, "dur": 4.830, + "args": { + "External id": 980263,"Record function id": 0, "Ev Idx": 2854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992714.733, "dur": 2.471, + "args": { + "External id": 980264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992715.439, "dur": 1.270, + "args": { + "External id": 980265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992715.883, "dur": 0.738, + "args": { + "External id": 980266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 2857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992721.469, "dur": 5.370, + "args": { + "External id": 980267,"Record function id": 0, "Ev Idx": 2858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345937992722.975, "dur": 3.369, + "args": { + "External id": 980268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992723.673, "dur": 2.189, + "args": { + "External id": 980269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345937992724.659, "dur": 1.097, + "args": { + "External id": 980270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 2861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937992731.426, "dur": 63550.390, + "args": { + "External id": 980271,"Record function id": 0, "Sequence number": 10552279, "Fwd thread id": 1, "Ev Idx": 2862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345937992732.902, "dur": 63538.823, + "args": { + "External id": 980272,"Sequence number": 10552279, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2863 + } + }, + { + "ph": "f", "id": 187, "pid": 2338708, "tid": 2379421, "ts": 6345937992732.902, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 2338708, "tid": 2379421, + "ts": 6345937992766.911, "dur": 44.934, + "args": { + "External id": 980273,"Record function id": 0, "Ev Idx": 2864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 2338708, "tid": 2379421, + "ts": 6345937992820.836, "dur": 77.651, + "args": { + "External id": 980274,"Record function id": 0, "Ev Idx": 2865 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 2338708, "tid": 2379421, + "ts": 6345937992905.794, "dur": 63355.963, + "args": { + "External id": 980275,"Record function id": 0, "Ev Idx": 2866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937993032.483, "dur": 8.913, + "args": { + "External id": 980276,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345937993094.003, "dur": 8.484, + "args": { + "External id": 980277,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937993123.394, "dur": 62144.742, + "args": { + "External id": 980278,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345937993140.033, "dur": 62113.258, + "args": { + "External id": 980279,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 2870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345937993275.449, "dur": 21.297, + "args": { + "External id": 980280,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345937993320.531, "dur": 61880.697, + "args": { + "External id": 980281,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 2872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345937993324.043, "dur": 61876.245, + "args": { + "External id": 980282,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 2873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345937993329.699, "dur": 12.172, + "args": { + "External id": 980283,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345937993344.373, "dur": 61850.284, + "args": { + "External id": 980284,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 2875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938055391.647, "dur": 15.029, + "args": { + "External id": 980285,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 2876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938055396.402, "dur": 9.720, + "args": { + "External id": 980286,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938055440.913, "dur": 400.339, + "args": { + "External id": 980287,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 2878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938055472.107, "dur": 362.870, + "args": { + "External id": 980288,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2879, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938055485.188, "dur": 343.128, + "args": { + "External id": 980289,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 2880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938055863.443, "dur": 2.537, + "args": { + "External id": 980290,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2881, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938055933.124, "dur": 7.298, + "args": { + "External id": 980291,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938055994.116, "dur": 1.678, + "args": { + "External id": 980292,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056040.167, "dur": 4.436, + "args": { + "External id": 980293,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056102.363, "dur": 1.970, + "args": { + "External id": 980294,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056120.284, "dur": 1.090, + "args": { + "External id": 980295,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056135.423, "dur": 1.213, + "args": { + "External id": 980296,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056150.876, "dur": 3.957, + "args": { + "External id": 980297,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056170.423, "dur": 2.553, + "args": { + "External id": 980298,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056186.769, "dur": 1.027, + "args": { + "External id": 980299,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938056301.051, "dur": 3398.450, + "args": { + "External id": 980300,"Record function id": 0, "Ev Idx": 2891 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 2338708, "tid": 2379421, + "ts": 6345938056323.470, "dur": 1235.977, + "args": { + "External id": 980301,"Record function id": 0, "Ev Idx": 2892 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338708, "tid": 2379421, + "ts": 6345938056339.828, "dur": 388.671, + "args": { + "External id": 980302,"Record function id": 0, "Ev Idx": 2893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938056439.897, "dur": 4.919, + "args": { + "External id": 980303,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 2894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938056448.410, "dur": 1.420, + "args": { + "External id": 980304,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938056451.944, "dur": 3.064, + "args": { + "External id": 980305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938056457.243, "dur": 1.227, + "args": { + "External id": 980306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938056460.160, "dur": 0.803, + "args": { + "External id": 980307,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938056462.887, "dur": 1.137, + "args": { + "External id": 980308,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 2899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938056466.384, "dur": 2.298, + "args": { + "External id": 980309,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 2900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938056470.396, "dur": 1.447, + "args": { + "External id": 980310,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938056473.603, "dur": 1.014, + "args": { + "External id": 980311,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938056476.251, "dur": 1.055, + "args": { + "External id": 980312,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 2903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938056511.858, "dur": 179.658, + "args": { + "External id": 980313,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938056530.659, "dur": 155.052, + "args": { + "External id": 980314,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 2905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938056549.835, "dur": 19.571, + "args": { + "External id": 980315,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938056573.836, "dur": 77.242, + "args": { + "External id": 980316,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 2907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938056577.111, "dur": 73.462, + "args": { + "External id": 980317,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 2908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056582.347, "dur": 6.127, + "args": { + "External id": 980318,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938056590.664, "dur": 59.388, + "args": { + "External id": 980319,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 2910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 2338708, "tid": 2379421, + "ts": 6345938056822.818, "dur": 727.331, + "args": { + "External id": 980320,"Record function id": 0, "Ev Idx": 2911 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338708, "tid": 2379421, + "ts": 6345938056843.785, "dur": 691.039, + "args": { + "External id": 980321,"Record function id": 0, "Ev Idx": 2912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938056906.101, "dur": 5.480, + "args": { + "External id": 980322,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938056929.675, "dur": 38.354, + "args": { + "External id": 980323,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056935.891, "dur": 1.594, + "args": { + "External id": 980324,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056939.967, "dur": 0.724, + "args": { + "External id": 980325,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056942.863, "dur": 0.622, + "args": { + "External id": 980326,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056945.946, "dur": 0.841, + "args": { + "External id": 980327,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056949.448, "dur": 1.069, + "args": { + "External id": 980328,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056952.689, "dur": 2.998, + "args": { + "External id": 980329,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056957.452, "dur": 0.466, + "args": { + "External id": 980330,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056959.892, "dur": 0.358, + "args": { + "External id": 980331,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938056962.362, "dur": 0.441, + "args": { + "External id": 980332,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938056978.765, "dur": 72.837, + "args": { + "External id": 980333,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345938057131.493, "dur": 139.901, + "args": { + "External id": 980334,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 2925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938057145.270, "dur": 6.392, + "args": { + "External id": 980335,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345938057158.336, "dur": 14.295, + "args": { + "External id": 980336,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345938057163.826, "dur": 8.306, + "args": { + "External id": 980337,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 2928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938057169.128, "dur": 0.952, + "args": { + "External id": 980338,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 2929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938057181.621, "dur": 31.265, + "args": { + "External id": 980339,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 2930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938057184.708, "dur": 0.678, + "args": { + "External id": 980340,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938057187.524, "dur": 0.463, + "args": { + "External id": 980341,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938057189.976, "dur": 2.892, + "args": { + "External id": 980342,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938057194.758, "dur": 0.445, + "args": { + "External id": 980343,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938057197.258, "dur": 0.866, + "args": { + "External id": 980344,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938057199.831, "dur": 0.359, + "args": { + "External id": 980345,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938057202.047, "dur": 0.610, + "args": { + "External id": 980346,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938057205.126, "dur": 0.485, + "args": { + "External id": 980347,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938057207.709, "dur": 0.489, + "args": { + "External id": 980348,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 2939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938057225.153, "dur": 36.799, + "args": { + "External id": 980349,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 2940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938057328.484, "dur": 130.801, + "args": { + "External id": 980350,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 2941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938057354.271, "dur": 100.699, + "args": { + "External id": 980351,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2942, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938057366.060, "dur": 84.127, + "args": { + "External id": 980352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 2943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938057475.554, "dur": 2.198, + "args": { + "External id": 980353,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2944, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938057567.987, "dur": 2105.530, + "args": { + "External id": 980354,"Sequence number": 10552278, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 2945 + } + }, + { + "ph": "f", "id": 188, "pid": 2338708, "tid": 2379421, "ts": 6345938057567.987, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938057696.467, "dur": 117.905, + "args": { + "External id": 980355,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 2946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938057857.768, "dur": 44.796, + "args": { + "External id": 980356,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 2947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345938057923.966, "dur": 57.153, + "args": { + "External id": 980357,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 2948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938057995.933, "dur": 107.087, + "args": { + "External id": 980358,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938058117.249, "dur": 45.560, + "args": { + "External id": 980359,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938058171.952, "dur": 36.417, + "args": { + "External id": 980360,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 2951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938058217.579, "dur": 34.998, + "args": { + "External id": 980361,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 2952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938058283.973, "dur": 30.475, + "args": { + "External id": 980362,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 2953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938058358.356, "dur": 34.827, + "args": { + "External id": 980363,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938058420.415, "dur": 25.063, + "args": { + "External id": 980364,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938058462.102, "dur": 17.756, + "args": { + "External id": 980365,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938058490.172, "dur": 46.072, + "args": { + "External id": 980366,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938058541.031, "dur": 39.327, + "args": { + "External id": 980367,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345938058616.748, "dur": 329.547, + "args": { + "External id": 980368,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938058711.074, "dur": 10.506, + "args": { + "External id": 980369,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938058723.951, "dur": 2.988, + "args": { + "External id": 980370,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938058735.101, "dur": 2.246, + "args": { + "External id": 980371,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938058739.033, "dur": 2.268, + "args": { + "External id": 980372,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938058795.046, "dur": 6.140, + "args": { + "External id": 980373,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938058797.715, "dur": 3.139, + "args": { + "External id": 980374,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938058803.776, "dur": 39.019, + "args": { + "External id": 980375,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938058810.228, "dur": 3.847, + "args": { + "External id": 980376,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938058844.860, "dur": 2.371, + "args": { + "External id": 980377,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938058846.381, "dur": 0.734, + "args": { + "External id": 980378,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 2969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938058849.025, "dur": 36.758, + "args": { + "External id": 980379,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 2970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938058851.703, "dur": 0.694, + "args": { + "External id": 980380,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 2971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938058989.569, "dur": 53.999, + "args": { + "External id": 980381,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938059108.716, "dur": 25.629, + "args": { + "External id": 980382,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938059151.103, "dur": 60.976, + "args": { + "External id": 980383,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938059220.912, "dur": 47.586, + "args": { + "External id": 980384,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938059281.414, "dur": 27.525, + "args": { + "External id": 980385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 2976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938059316.339, "dur": 37.622, + "args": { + "External id": 980386,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 2977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938059363.243, "dur": 34.436, + "args": { + "External id": 980387,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 2978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938059406.435, "dur": 36.638, + "args": { + "External id": 980388,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 2979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345938059463.608, "dur": 28.612, + "args": { + "External id": 980389,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 2980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938059509.890, "dur": 28.648, + "args": { + "External id": 980390,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938059554.303, "dur": 20.666, + "args": { + "External id": 980391,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 2982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938059592.505, "dur": 16.452, + "args": { + "External id": 980392,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 2983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345938059622.554, "dur": 18.028, + "args": { + "External id": 980393,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 2984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059725.313, "dur": 19.172, + "args": { + "External id": 980394,"Record function id": 0, "Ev Idx": 2985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059729.618, "dur": 13.918, + "args": { + "External id": 980395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059734.808, "dur": 7.602, + "args": { + "External id": 980396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059737.074, "dur": 5.192, + "args": { + "External id": 980397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059749.670, "dur": 6.557, + "args": { + "External id": 980398,"Record function id": 0, "Ev Idx": 2989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059751.394, "dur": 4.324, + "args": { + "External id": 980399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059752.527, "dur": 2.658, + "args": { + "External id": 980400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059753.865, "dur": 1.194, + "args": { + "External id": 980401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 2992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059760.510, "dur": 7.637, + "args": { + "External id": 980402,"Record function id": 0, "Ev Idx": 2993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059762.291, "dur": 5.367, + "args": { + "External id": 980403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059763.019, "dur": 4.142, + "args": { + "External id": 980404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059763.373, "dur": 3.702, + "args": { + "External id": 980405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 2996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059772.328, "dur": 4.685, + "args": { + "External id": 980406,"Record function id": 0, "Ev Idx": 2997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059773.741, "dur": 2.797, + "args": { + "External id": 980407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059774.394, "dur": 1.667, + "args": { + "External id": 980408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 2999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059774.997, "dur": 0.982, + "args": { + "External id": 980409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059780.800, "dur": 4.861, + "args": { + "External id": 980410,"Record function id": 0, "Ev Idx": 3001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059782.187, "dur": 2.991, + "args": { + "External id": 980411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059783.007, "dur": 1.710, + "args": { + "External id": 980412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059783.687, "dur": 0.941, + "args": { + "External id": 980413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059789.764, "dur": 4.704, + "args": { + "External id": 980414,"Record function id": 0, "Ev Idx": 3005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059791.060, "dur": 2.913, + "args": { + "External id": 980415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059791.799, "dur": 1.677, + "args": { + "External id": 980416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059792.396, "dur": 0.951, + "args": { + "External id": 980417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059798.949, "dur": 4.599, + "args": { + "External id": 980418,"Record function id": 0, "Ev Idx": 3009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059800.493, "dur": 2.564, + "args": { + "External id": 980419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059800.994, "dur": 1.559, + "args": { + "External id": 980420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059801.551, "dur": 0.891, + "args": { + "External id": 980421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059807.258, "dur": 4.690, + "args": { + "External id": 980422,"Record function id": 0, "Ev Idx": 3013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059809.116, "dur": 2.365, + "args": { + "External id": 980423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059809.845, "dur": 1.183, + "args": { + "External id": 980424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059810.201, "dur": 0.737, + "args": { + "External id": 980425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059815.712, "dur": 4.542, + "args": { + "External id": 980426,"Record function id": 0, "Ev Idx": 3017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938059817.154, "dur": 2.608, + "args": { + "External id": 980427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059817.753, "dur": 1.545, + "args": { + "External id": 980428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938059818.234, "dur": 0.959, + "args": { + "External id": 980429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938059825.042, "dur": 64131.975, + "args": { + "External id": 980430,"Record function id": 0, "Sequence number": 10552277, "Fwd thread id": 1, "Ev Idx": 3021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938059826.487, "dur": 64120.517, + "args": { + "External id": 980431,"Sequence number": 10552277, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3022 + } + }, + { + "ph": "f", "id": 189, "pid": 2338708, "tid": 2379421, "ts": 6345938059826.487, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 2338708, "tid": 2379421, + "ts": 6345938059857.993, "dur": 41.864, + "args": { + "External id": 980432,"Record function id": 0, "Ev Idx": 3023 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 2338708, "tid": 2379421, + "ts": 6345938059909.135, "dur": 72.572, + "args": { + "External id": 980433,"Record function id": 0, "Ev Idx": 3024 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 2338708, "tid": 2379421, + "ts": 6345938059988.447, "dur": 63949.210, + "args": { + "External id": 980434,"Record function id": 0, "Ev Idx": 3025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938060149.931, "dur": 9.799, + "args": { + "External id": 980435,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938060173.402, "dur": 7.962, + "args": { + "External id": 980436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938060199.311, "dur": 62724.812, + "args": { + "External id": 980437,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938060215.527, "dur": 62693.992, + "args": { + "External id": 980438,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938060328.992, "dur": 20.030, + "args": { + "External id": 980439,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938060376.153, "dur": 62480.903, + "args": { + "External id": 980440,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938060379.712, "dur": 62476.183, + "args": { + "External id": 980441,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938060385.050, "dur": 9.933, + "args": { + "External id": 980442,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938060397.229, "dur": 62452.942, + "args": { + "External id": 980443,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938123089.306, "dur": 15.377, + "args": { + "External id": 980444,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938123093.864, "dur": 10.004, + "args": { + "External id": 980445,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938123141.188, "dur": 450.045, + "args": { + "External id": 980446,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938123172.577, "dur": 412.249, + "args": { + "External id": 980447,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3038, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938123186.276, "dur": 392.057, + "args": { + "External id": 980448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938123613.156, "dur": 2.742, + "args": { + "External id": 980449,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3040, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938123687.376, "dur": 7.877, + "args": { + "External id": 980450,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938123747.416, "dur": 1.782, + "args": { + "External id": 980451,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938123769.253, "dur": 3.438, + "args": { + "External id": 980452,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938123787.283, "dur": 1.656, + "args": { + "External id": 980453,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938123803.109, "dur": 0.969, + "args": { + "External id": 980454,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938123818.356, "dur": 1.516, + "args": { + "External id": 980455,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938123834.587, "dur": 3.518, + "args": { + "External id": 980456,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938123852.648, "dur": 2.589, + "args": { + "External id": 980457,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938123868.549, "dur": 1.145, + "args": { + "External id": 980458,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938123973.456, "dur": 3448.816, + "args": { + "External id": 980459,"Record function id": 0, "Ev Idx": 3050 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 2338708, "tid": 2379421, + "ts": 6345938123995.189, "dur": 1327.045, + "args": { + "External id": 980460,"Record function id": 0, "Ev Idx": 3051 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338708, "tid": 2379421, + "ts": 6345938124034.403, "dur": 437.912, + "args": { + "External id": 980461,"Record function id": 0, "Ev Idx": 3052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938124177.188, "dur": 6.313, + "args": { + "External id": 980462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938124187.357, "dur": 1.583, + "args": { + "External id": 980463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938124191.122, "dur": 3.303, + "args": { + "External id": 980464,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938124196.939, "dur": 1.061, + "args": { + "External id": 980465,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938124199.785, "dur": 1.150, + "args": { + "External id": 980466,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938124202.795, "dur": 1.363, + "args": { + "External id": 980467,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938124206.307, "dur": 2.283, + "args": { + "External id": 980468,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938124210.264, "dur": 1.383, + "args": { + "External id": 980469,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938124213.449, "dur": 1.151, + "args": { + "External id": 980470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938124216.338, "dur": 1.125, + "args": { + "External id": 980471,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938124254.551, "dur": 180.574, + "args": { + "External id": 980472,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938124274.246, "dur": 155.496, + "args": { + "External id": 980473,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938124293.578, "dur": 19.832, + "args": { + "External id": 980474,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938124317.638, "dur": 77.800, + "args": { + "External id": 980475,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938124320.984, "dur": 74.055, + "args": { + "External id": 980476,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124326.156, "dur": 6.292, + "args": { + "External id": 980477,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938124334.747, "dur": 59.618, + "args": { + "External id": 980478,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3069 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 2338708, "tid": 2379421, + "ts": 6345938124560.721, "dur": 751.978, + "args": { + "External id": 980479,"Record function id": 0, "Ev Idx": 3070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338708, "tid": 2379421, + "ts": 6345938124581.352, "dur": 716.274, + "args": { + "External id": 980480,"Record function id": 0, "Ev Idx": 3071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938124675.890, "dur": 17.235, + "args": { + "External id": 980481,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938124711.039, "dur": 35.753, + "args": { + "External id": 980482,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124716.793, "dur": 2.042, + "args": { + "External id": 980483,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124721.489, "dur": 0.531, + "args": { + "External id": 980484,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124724.033, "dur": 0.443, + "args": { + "External id": 980485,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124726.286, "dur": 0.484, + "args": { + "External id": 980486,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124728.573, "dur": 0.552, + "args": { + "External id": 980487,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124731.046, "dur": 2.873, + "args": { + "External id": 980488,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124736.374, "dur": 0.585, + "args": { + "External id": 980489,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124738.811, "dur": 0.573, + "args": { + "External id": 980490,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124741.193, "dur": 0.468, + "args": { + "External id": 980491,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938124757.612, "dur": 52.822, + "args": { + "External id": 980492,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345938124846.781, "dur": 126.793, + "args": { + "External id": 980493,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938124858.416, "dur": 3.869, + "args": { + "External id": 980494,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345938124867.850, "dur": 11.857, + "args": { + "External id": 980495,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345938124872.786, "dur": 6.453, + "args": { + "External id": 980496,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124877.044, "dur": 0.644, + "args": { + "External id": 980497,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938124888.194, "dur": 27.526, + "args": { + "External id": 980498,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124890.478, "dur": 0.517, + "args": { + "External id": 980499,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124893.248, "dur": 0.485, + "args": { + "External id": 980500,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124895.685, "dur": 2.548, + "args": { + "External id": 980501,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124900.123, "dur": 0.443, + "args": { + "External id": 980502,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124902.246, "dur": 0.483, + "args": { + "External id": 980503,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124904.464, "dur": 0.455, + "args": { + "External id": 980504,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124906.849, "dur": 0.434, + "args": { + "External id": 980505,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124908.764, "dur": 0.446, + "args": { + "External id": 980506,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938124910.864, "dur": 0.484, + "args": { + "External id": 980507,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938124929.334, "dur": 34.922, + "args": { + "External id": 980508,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938125045.910, "dur": 170.831, + "args": { + "External id": 980509,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938125108.853, "dur": 103.493, + "args": { + "External id": 980510,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3101, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938125121.081, "dur": 86.391, + "args": { + "External id": 980511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938125233.182, "dur": 2.097, + "args": { + "External id": 980512,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3103, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938125330.612, "dur": 2067.249, + "args": { + "External id": 980513,"Sequence number": 10552276, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3104 + } + }, + { + "ph": "f", "id": 190, "pid": 2338708, "tid": 2379421, "ts": 6345938125330.612, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938125457.580, "dur": 119.584, + "args": { + "External id": 980514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938125637.379, "dur": 46.945, + "args": { + "External id": 980515,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345938125706.256, "dur": 60.603, + "args": { + "External id": 980516,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938125781.370, "dur": 39.369, + "args": { + "External id": 980517,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938125829.978, "dur": 40.115, + "args": { + "External id": 980518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938125879.308, "dur": 35.208, + "args": { + "External id": 980519,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938125924.106, "dur": 34.852, + "args": { + "External id": 980520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938125986.848, "dur": 48.066, + "args": { + "External id": 980521,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938126098.223, "dur": 37.524, + "args": { + "External id": 980522,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938126163.272, "dur": 24.696, + "args": { + "External id": 980523,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938126202.571, "dur": 18.448, + "args": { + "External id": 980524,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938126232.163, "dur": 55.741, + "args": { + "External id": 980525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938126292.858, "dur": 40.065, + "args": { + "External id": 980526,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345938126371.000, "dur": 317.220, + "args": { + "External id": 980527,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938126463.374, "dur": 6.789, + "args": { + "External id": 980528,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938126472.530, "dur": 3.195, + "args": { + "External id": 980529,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938126477.419, "dur": 2.114, + "args": { + "External id": 980530,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938126480.858, "dur": 1.830, + "args": { + "External id": 980531,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938126552.068, "dur": 6.448, + "args": { + "External id": 980532,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938126554.452, "dur": 3.465, + "args": { + "External id": 980533,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938126560.885, "dur": 41.450, + "args": { + "External id": 980534,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938126567.625, "dur": 4.240, + "args": { + "External id": 980535,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938126604.444, "dur": 1.983, + "args": { + "External id": 980536,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938126605.585, "dur": 0.737, + "args": { + "External id": 980537,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938126607.894, "dur": 18.517, + "args": { + "External id": 980538,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938126610.022, "dur": 0.675, + "args": { + "External id": 980539,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938126730.367, "dur": 31.786, + "args": { + "External id": 980540,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938126781.664, "dur": 18.125, + "args": { + "External id": 980541,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938126809.461, "dur": 46.704, + "args": { + "External id": 980542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938126864.680, "dur": 45.354, + "args": { + "External id": 980543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938126923.127, "dur": 25.882, + "args": { + "External id": 980544,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938126956.452, "dur": 35.850, + "args": { + "External id": 980545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938127000.480, "dur": 92.086, + "args": { + "External id": 980546,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938127106.467, "dur": 45.417, + "args": { + "External id": 980547,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345938127177.773, "dur": 29.417, + "args": { + "External id": 980548,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938127225.314, "dur": 29.561, + "args": { + "External id": 980549,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938127270.941, "dur": 22.226, + "args": { + "External id": 980550,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938127311.255, "dur": 18.374, + "args": { + "External id": 980551,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345938127344.261, "dur": 18.853, + "args": { + "External id": 980552,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127447.741, "dur": 16.870, + "args": { + "External id": 980553,"Record function id": 0, "Ev Idx": 3144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127451.676, "dur": 11.909, + "args": { + "External id": 980554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127456.237, "dur": 6.351, + "args": { + "External id": 980555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127458.104, "dur": 4.335, + "args": { + "External id": 980556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127469.203, "dur": 27.972, + "args": { + "External id": 980557,"Record function id": 0, "Ev Idx": 3148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127470.890, "dur": 25.723, + "args": { + "External id": 980558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127471.640, "dur": 24.401, + "args": { + "External id": 980559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127494.766, "dur": 1.085, + "args": { + "External id": 980560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127501.549, "dur": 7.738, + "args": { + "External id": 980561,"Record function id": 0, "Ev Idx": 3152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127502.952, "dur": 5.840, + "args": { + "External id": 980562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127503.938, "dur": 4.375, + "args": { + "External id": 980563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127504.372, "dur": 3.857, + "args": { + "External id": 980564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127513.298, "dur": 4.458, + "args": { + "External id": 980565,"Record function id": 0, "Ev Idx": 3156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127514.361, "dur": 2.889, + "args": { + "External id": 980566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127515.034, "dur": 1.754, + "args": { + "External id": 980567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127515.604, "dur": 1.103, + "args": { + "External id": 980568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127521.541, "dur": 4.467, + "args": { + "External id": 980569,"Record function id": 0, "Ev Idx": 3160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127522.857, "dur": 2.634, + "args": { + "External id": 980570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127523.591, "dur": 1.439, + "args": { + "External id": 980571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127524.225, "dur": 0.717, + "args": { + "External id": 980572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127529.948, "dur": 4.362, + "args": { + "External id": 980573,"Record function id": 0, "Ev Idx": 3164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127531.496, "dur": 2.313, + "args": { + "External id": 980574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127532.031, "dur": 1.289, + "args": { + "External id": 980575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127532.491, "dur": 0.715, + "args": { + "External id": 980576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127538.120, "dur": 5.741, + "args": { + "External id": 980577,"Record function id": 0, "Ev Idx": 3168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127539.171, "dur": 4.187, + "args": { + "External id": 980578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127539.866, "dur": 2.939, + "args": { + "External id": 980579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127542.016, "dur": 0.699, + "args": { + "External id": 980580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127547.545, "dur": 3.959, + "args": { + "External id": 980581,"Record function id": 0, "Ev Idx": 3172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127548.719, "dur": 2.305, + "args": { + "External id": 980582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127549.279, "dur": 1.243, + "args": { + "External id": 980583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127549.618, "dur": 0.820, + "args": { + "External id": 980584,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127555.190, "dur": 3.886, + "args": { + "External id": 980585,"Record function id": 0, "Ev Idx": 3176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938127556.400, "dur": 2.155, + "args": { + "External id": 980586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127556.914, "dur": 1.171, + "args": { + "External id": 980587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938127557.234, "dur": 0.763, + "args": { + "External id": 980588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938127563.930, "dur": 64753.883, + "args": { + "External id": 980589,"Record function id": 0, "Sequence number": 10552275, "Fwd thread id": 1, "Ev Idx": 3180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938127565.561, "dur": 64742.042, + "args": { + "External id": 980590,"Sequence number": 10552275, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3181 + } + }, + { + "ph": "f", "id": 191, "pid": 2338708, "tid": 2379421, "ts": 6345938127565.561, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 2338708, "tid": 2379421, + "ts": 6345938127601.888, "dur": 45.048, + "args": { + "External id": 980591,"Record function id": 0, "Ev Idx": 3182 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 2338708, "tid": 2379421, + "ts": 6345938127655.991, "dur": 73.881, + "args": { + "External id": 980592,"Record function id": 0, "Ev Idx": 3183 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 2338708, "tid": 2379421, + "ts": 6345938127736.974, "dur": 64561.003, + "args": { + "External id": 980593,"Record function id": 0, "Ev Idx": 3184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938127839.786, "dur": 7.613, + "args": { + "External id": 980594,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938127858.916, "dur": 7.421, + "args": { + "External id": 980595,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938127882.776, "dur": 63441.887, + "args": { + "External id": 980596,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938127898.994, "dur": 63410.782, + "args": { + "External id": 980597,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938128003.932, "dur": 42.849, + "args": { + "External id": 980598,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938128114.506, "dur": 63142.624, + "args": { + "External id": 980599,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938128118.356, "dur": 63137.713, + "args": { + "External id": 980600,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938128124.231, "dur": 15.014, + "args": { + "External id": 980601,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938128142.081, "dur": 63108.628, + "args": { + "External id": 980602,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938191448.293, "dur": 13.291, + "args": { + "External id": 980603,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938191452.561, "dur": 8.648, + "args": { + "External id": 980604,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938191497.364, "dur": 398.651, + "args": { + "External id": 980605,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938191529.759, "dur": 360.495, + "args": { + "External id": 980606,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3197, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938191543.201, "dur": 339.590, + "args": { + "External id": 980607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938191916.817, "dur": 2.759, + "args": { + "External id": 980608,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3199, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938191987.476, "dur": 7.464, + "args": { + "External id": 980609,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192105.042, "dur": 3.330, + "args": { + "External id": 980610,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192128.537, "dur": 3.957, + "args": { + "External id": 980611,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192146.969, "dur": 1.105, + "args": { + "External id": 980612,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192161.692, "dur": 1.062, + "args": { + "External id": 980613,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192175.431, "dur": 1.155, + "args": { + "External id": 980614,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192189.275, "dur": 3.092, + "args": { + "External id": 980615,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192205.840, "dur": 2.728, + "args": { + "External id": 980616,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192222.560, "dur": 1.129, + "args": { + "External id": 980617,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938192336.948, "dur": 3320.640, + "args": { + "External id": 980618,"Record function id": 0, "Ev Idx": 3209 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 2338708, "tid": 2379421, + "ts": 6345938192359.561, "dur": 1217.950, + "args": { + "External id": 980619,"Record function id": 0, "Ev Idx": 3210 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338708, "tid": 2379421, + "ts": 6345938192376.007, "dur": 387.982, + "args": { + "External id": 980620,"Record function id": 0, "Ev Idx": 3211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938192473.766, "dur": 4.750, + "args": { + "External id": 980621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938192482.024, "dur": 1.227, + "args": { + "External id": 980622,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938192485.385, "dur": 3.090, + "args": { + "External id": 980623,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938192490.497, "dur": 1.054, + "args": { + "External id": 980624,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938192493.611, "dur": 0.807, + "args": { + "External id": 980625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938192496.335, "dur": 1.012, + "args": { + "External id": 980626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938192499.257, "dur": 2.034, + "args": { + "External id": 980627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938192502.964, "dur": 1.186, + "args": { + "External id": 980628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938192505.998, "dur": 1.093, + "args": { + "External id": 980629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938192508.699, "dur": 0.761, + "args": { + "External id": 980630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938192529.831, "dur": 197.876, + "args": { + "External id": 980631,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938192561.982, "dur": 160.298, + "args": { + "External id": 980632,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938192580.637, "dur": 19.972, + "args": { + "External id": 980633,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938192605.013, "dur": 80.509, + "args": { + "External id": 980634,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938192608.284, "dur": 76.745, + "args": { + "External id": 980635,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192613.151, "dur": 6.677, + "args": { + "External id": 980636,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938192622.061, "dur": 62.172, + "args": { + "External id": 980637,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3228 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 2338708, "tid": 2379421, + "ts": 6345938192853.489, "dur": 713.331, + "args": { + "External id": 980638,"Record function id": 0, "Ev Idx": 3229 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338708, "tid": 2379421, + "ts": 6345938192873.108, "dur": 679.022, + "args": { + "External id": 980639,"Record function id": 0, "Ev Idx": 3230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938192937.355, "dur": 6.151, + "args": { + "External id": 980640,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938192961.436, "dur": 34.748, + "args": { + "External id": 980641,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192967.836, "dur": 1.679, + "args": { + "External id": 980642,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192971.781, "dur": 0.687, + "args": { + "External id": 980643,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192974.502, "dur": 0.666, + "args": { + "External id": 980644,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192977.442, "dur": 0.552, + "args": { + "External id": 980645,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192979.706, "dur": 0.472, + "args": { + "External id": 980646,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192981.955, "dur": 2.515, + "args": { + "External id": 980647,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192986.166, "dur": 0.433, + "args": { + "External id": 980648,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192988.535, "dur": 0.368, + "args": { + "External id": 980649,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938192990.806, "dur": 0.411, + "args": { + "External id": 980650,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938193025.129, "dur": 93.723, + "args": { + "External id": 980651,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345938193161.152, "dur": 132.070, + "args": { + "External id": 980652,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938193174.664, "dur": 5.496, + "args": { + "External id": 980653,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345938193186.412, "dur": 13.040, + "args": { + "External id": 980654,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345938193191.435, "dur": 7.522, + "args": { + "External id": 980655,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938193196.042, "dur": 1.100, + "args": { + "External id": 980656,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938193208.106, "dur": 28.114, + "args": { + "External id": 980657,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938193210.787, "dur": 0.636, + "args": { + "External id": 980658,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938193213.166, "dur": 0.485, + "args": { + "External id": 980659,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938193215.585, "dur": 3.051, + "args": { + "External id": 980660,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938193220.689, "dur": 0.479, + "args": { + "External id": 980661,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938193222.685, "dur": 0.604, + "args": { + "External id": 980662,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938193224.693, "dur": 0.425, + "args": { + "External id": 980663,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938193226.599, "dur": 0.421, + "args": { + "External id": 980664,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938193228.717, "dur": 0.466, + "args": { + "External id": 980665,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938193230.924, "dur": 0.393, + "args": { + "External id": 980666,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938193250.185, "dur": 34.072, + "args": { + "External id": 980667,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938193348.937, "dur": 127.654, + "args": { + "External id": 980668,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938193374.052, "dur": 98.199, + "args": { + "External id": 980669,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3260, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938193386.003, "dur": 81.553, + "args": { + "External id": 980670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938193492.377, "dur": 2.257, + "args": { + "External id": 980671,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3262, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938193586.281, "dur": 2049.602, + "args": { + "External id": 980672,"Sequence number": 10552274, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3263 + } + }, + { + "ph": "f", "id": 192, "pid": 2338708, "tid": 2379421, "ts": 6345938193586.281, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938193710.851, "dur": 118.342, + "args": { + "External id": 980673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938193872.756, "dur": 44.615, + "args": { + "External id": 980674,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345938193937.604, "dur": 58.581, + "args": { + "External id": 980675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938194032.823, "dur": 88.843, + "args": { + "External id": 980676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938194135.496, "dur": 43.167, + "args": { + "External id": 980677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938194188.577, "dur": 35.960, + "args": { + "External id": 980678,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938194233.823, "dur": 35.817, + "args": { + "External id": 980679,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938194301.265, "dur": 28.895, + "args": { + "External id": 980680,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938194350.672, "dur": 33.384, + "args": { + "External id": 980681,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938194409.644, "dur": 24.062, + "args": { + "External id": 980682,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938194448.679, "dur": 19.927, + "args": { + "External id": 980683,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938194478.532, "dur": 43.601, + "args": { + "External id": 980684,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938194526.682, "dur": 37.891, + "args": { + "External id": 980685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345938194600.490, "dur": 284.317, + "args": { + "External id": 980686,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938194691.607, "dur": 7.208, + "args": { + "External id": 980687,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938194701.637, "dur": 3.190, + "args": { + "External id": 980688,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938194706.287, "dur": 1.835, + "args": { + "External id": 980689,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938194709.446, "dur": 1.964, + "args": { + "External id": 980690,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938194765.681, "dur": 5.499, + "args": { + "External id": 980691,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938194767.911, "dur": 3.043, + "args": { + "External id": 980692,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938194773.450, "dur": 38.258, + "args": { + "External id": 980693,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938194780.101, "dur": 4.003, + "args": { + "External id": 980694,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938194813.845, "dur": 2.378, + "args": { + "External id": 980695,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938194815.249, "dur": 0.859, + "args": { + "External id": 980696,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938194818.081, "dur": 17.114, + "args": { + "External id": 980697,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938194820.177, "dur": 0.777, + "args": { + "External id": 980698,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938194936.842, "dur": 39.392, + "args": { + "External id": 980699,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938194998.527, "dur": 42.245, + "args": { + "External id": 980700,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938195100.027, "dur": 65.505, + "args": { + "External id": 980701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938195175.855, "dur": 46.572, + "args": { + "External id": 980702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938195236.478, "dur": 27.855, + "args": { + "External id": 980703,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938195272.334, "dur": 37.501, + "args": { + "External id": 980704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938195319.441, "dur": 33.537, + "args": { + "External id": 980705,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938195361.191, "dur": 35.481, + "args": { + "External id": 980706,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345938195419.531, "dur": 27.001, + "args": { + "External id": 980707,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938195464.199, "dur": 30.151, + "args": { + "External id": 980708,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938195509.734, "dur": 21.363, + "args": { + "External id": 980709,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938195549.092, "dur": 19.315, + "args": { + "External id": 980710,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345938195582.040, "dur": 19.295, + "args": { + "External id": 980711,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195683.454, "dur": 17.467, + "args": { + "External id": 980712,"Record function id": 0, "Ev Idx": 3303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195686.923, "dur": 12.880, + "args": { + "External id": 980713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195691.703, "dur": 6.908, + "args": { + "External id": 980714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195693.835, "dur": 4.612, + "args": { + "External id": 980715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195705.819, "dur": 5.727, + "args": { + "External id": 980716,"Record function id": 0, "Ev Idx": 3307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195707.410, "dur": 3.608, + "args": { + "External id": 980717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195708.346, "dur": 2.127, + "args": { + "External id": 980718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195709.234, "dur": 1.108, + "args": { + "External id": 980719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195715.958, "dur": 6.992, + "args": { + "External id": 980720,"Record function id": 0, "Ev Idx": 3311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195717.285, "dur": 5.136, + "args": { + "External id": 980721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195718.363, "dur": 3.577, + "args": { + "External id": 980722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195718.762, "dur": 3.096, + "args": { + "External id": 980723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195727.057, "dur": 5.013, + "args": { + "External id": 980724,"Record function id": 0, "Ev Idx": 3315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195728.449, "dur": 3.113, + "args": { + "External id": 980725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195729.227, "dur": 1.838, + "args": { + "External id": 980726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195729.975, "dur": 1.008, + "args": { + "External id": 980727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195735.938, "dur": 4.435, + "args": { + "External id": 980728,"Record function id": 0, "Ev Idx": 3319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195737.637, "dur": 2.240, + "args": { + "External id": 980729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195738.267, "dur": 1.119, + "args": { + "External id": 980730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195738.663, "dur": 0.635, + "args": { + "External id": 980731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195744.156, "dur": 4.531, + "args": { + "External id": 980732,"Record function id": 0, "Ev Idx": 3323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195745.417, "dur": 2.786, + "args": { + "External id": 980733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195746.298, "dur": 1.363, + "args": { + "External id": 980734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195746.872, "dur": 0.640, + "args": { + "External id": 980735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195752.536, "dur": 4.159, + "args": { + "External id": 980736,"Record function id": 0, "Ev Idx": 3327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195753.674, "dur": 2.493, + "args": { + "External id": 980737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195754.350, "dur": 1.315, + "args": { + "External id": 980738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195754.896, "dur": 0.641, + "args": { + "External id": 980739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195760.470, "dur": 4.323, + "args": { + "External id": 980740,"Record function id": 0, "Ev Idx": 3331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195761.703, "dur": 2.578, + "args": { + "External id": 980741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195762.480, "dur": 1.297, + "args": { + "External id": 980742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195762.817, "dur": 0.870, + "args": { + "External id": 980743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195768.605, "dur": 4.445, + "args": { + "External id": 980744,"Record function id": 0, "Ev Idx": 3335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938195769.803, "dur": 2.762, + "args": { + "External id": 980745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195770.329, "dur": 1.739, + "args": { + "External id": 980746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938195771.220, "dur": 0.742, + "args": { + "External id": 980747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938195777.752, "dur": 65704.950, + "args": { + "External id": 980748,"Record function id": 0, "Sequence number": 10552273, "Fwd thread id": 1, "Ev Idx": 3339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938195779.362, "dur": 65693.807, + "args": { + "External id": 980749,"Sequence number": 10552273, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3340 + } + }, + { + "ph": "f", "id": 193, "pid": 2338708, "tid": 2379421, "ts": 6345938195779.362, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 2338708, "tid": 2379421, + "ts": 6345938195812.131, "dur": 44.928, + "args": { + "External id": 980750,"Record function id": 0, "Ev Idx": 3341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 2338708, "tid": 2379421, + "ts": 6345938195866.249, "dur": 71.940, + "args": { + "External id": 980751,"Record function id": 0, "Ev Idx": 3342 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 2338708, "tid": 2379421, + "ts": 6345938195945.587, "dur": 65518.677, + "args": { + "External id": 980752,"Record function id": 0, "Ev Idx": 3343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938196115.708, "dur": 9.879, + "args": { + "External id": 980753,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938196138.602, "dur": 7.959, + "args": { + "External id": 980754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938196164.618, "dur": 64312.364, + "args": { + "External id": 980755,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938196180.718, "dur": 64281.566, + "args": { + "External id": 980756,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938196326.852, "dur": 19.957, + "args": { + "External id": 980757,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938196370.123, "dur": 64040.665, + "args": { + "External id": 980758,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938196373.434, "dur": 64036.204, + "args": { + "External id": 980759,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938196378.769, "dur": 10.611, + "args": { + "External id": 980760,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938196391.603, "dur": 64012.220, + "args": { + "External id": 980761,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938260598.456, "dur": 13.570, + "args": { + "External id": 980762,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938260602.885, "dur": 8.642, + "args": { + "External id": 980763,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938260645.650, "dur": 463.650, + "args": { + "External id": 980764,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938260676.342, "dur": 425.747, + "args": { + "External id": 980765,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3356, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938260689.039, "dur": 405.459, + "args": { + "External id": 980766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938261134.240, "dur": 3.104, + "args": { + "External id": 980767,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3358, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938261212.874, "dur": 8.135, + "args": { + "External id": 980768,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938261272.870, "dur": 1.843, + "args": { + "External id": 980769,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938261294.178, "dur": 3.504, + "args": { + "External id": 980770,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938261312.002, "dur": 1.407, + "args": { + "External id": 980771,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938261325.449, "dur": 1.306, + "args": { + "External id": 980772,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938261339.436, "dur": 1.134, + "args": { + "External id": 980773,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938261352.473, "dur": 3.065, + "args": { + "External id": 980774,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938261376.952, "dur": 2.732, + "args": { + "External id": 980775,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938261392.020, "dur": 1.053, + "args": { + "External id": 980776,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938261499.778, "dur": 3330.586, + "args": { + "External id": 980777,"Record function id": 0, "Ev Idx": 3368 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 2338708, "tid": 2379421, + "ts": 6345938261521.280, "dur": 1235.191, + "args": { + "External id": 980778,"Record function id": 0, "Ev Idx": 3369 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338708, "tid": 2379421, + "ts": 6345938261537.316, "dur": 364.373, + "args": { + "External id": 980779,"Record function id": 0, "Ev Idx": 3370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938261634.375, "dur": 5.190, + "args": { + "External id": 980780,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938261643.036, "dur": 1.149, + "args": { + "External id": 980781,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938261646.058, "dur": 3.309, + "args": { + "External id": 980782,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938261651.511, "dur": 1.054, + "args": { + "External id": 980783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938261654.098, "dur": 0.962, + "args": { + "External id": 980784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938261656.790, "dur": 1.083, + "args": { + "External id": 980785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938261659.762, "dur": 2.290, + "args": { + "External id": 980786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938261663.812, "dur": 0.841, + "args": { + "External id": 980787,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938261666.195, "dur": 0.727, + "args": { + "External id": 980788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938261668.574, "dur": 0.857, + "args": { + "External id": 980789,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938261689.873, "dur": 177.502, + "args": { + "External id": 980790,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938261707.593, "dur": 153.769, + "args": { + "External id": 980791,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938261726.615, "dur": 18.550, + "args": { + "External id": 980792,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938261749.585, "dur": 78.014, + "args": { + "External id": 980793,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938261752.565, "dur": 74.545, + "args": { + "External id": 980794,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938261757.599, "dur": 6.225, + "args": { + "External id": 980795,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938261766.299, "dur": 60.070, + "args": { + "External id": 980796,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 2338708, "tid": 2379421, + "ts": 6345938261996.498, "dur": 750.081, + "args": { + "External id": 980797,"Record function id": 0, "Ev Idx": 3388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338708, "tid": 2379421, + "ts": 6345938262051.499, "dur": 679.380, + "args": { + "External id": 980798,"Record function id": 0, "Ev Idx": 3389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938262166.402, "dur": 8.554, + "args": { + "External id": 980799,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938262193.713, "dur": 42.271, + "args": { + "External id": 980800,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262205.104, "dur": 2.235, + "args": { + "External id": 980801,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262209.899, "dur": 0.495, + "args": { + "External id": 980802,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262212.426, "dur": 0.763, + "args": { + "External id": 980803,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262214.863, "dur": 0.476, + "args": { + "External id": 980804,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262217.073, "dur": 0.461, + "args": { + "External id": 980805,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262219.454, "dur": 3.026, + "args": { + "External id": 980806,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262224.452, "dur": 0.523, + "args": { + "External id": 980807,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262226.803, "dur": 0.493, + "args": { + "External id": 980808,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262229.272, "dur": 0.402, + "args": { + "External id": 980809,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938262247.225, "dur": 57.733, + "args": { + "External id": 980810,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345938262346.127, "dur": 128.434, + "args": { + "External id": 980811,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938262358.477, "dur": 4.574, + "args": { + "External id": 980812,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345938262369.301, "dur": 11.626, + "args": { + "External id": 980813,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345938262374.251, "dur": 6.198, + "args": { + "External id": 980814,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262378.400, "dur": 0.682, + "args": { + "External id": 980815,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938262388.786, "dur": 28.166, + "args": { + "External id": 980816,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262391.348, "dur": 0.473, + "args": { + "External id": 980817,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262393.401, "dur": 0.506, + "args": { + "External id": 980818,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262395.636, "dur": 2.810, + "args": { + "External id": 980819,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262400.382, "dur": 0.409, + "args": { + "External id": 980820,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262402.846, "dur": 0.388, + "args": { + "External id": 980821,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262405.325, "dur": 0.581, + "args": { + "External id": 980822,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262407.634, "dur": 0.510, + "args": { + "External id": 980823,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262409.879, "dur": 0.475, + "args": { + "External id": 980824,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938262412.257, "dur": 0.480, + "args": { + "External id": 980825,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938262431.494, "dur": 33.871, + "args": { + "External id": 980826,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938262528.659, "dur": 126.143, + "args": { + "External id": 980827,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938262553.847, "dur": 96.675, + "args": { + "External id": 980828,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3419, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938262564.872, "dur": 80.910, + "args": { + "External id": 980829,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938262670.841, "dur": 2.301, + "args": { + "External id": 980830,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3421, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938262764.704, "dur": 2040.590, + "args": { + "External id": 980831,"Sequence number": 10552272, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3422 + } + }, + { + "ph": "f", "id": 194, "pid": 2338708, "tid": 2379421, "ts": 6345938262764.704, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938262890.104, "dur": 115.612, + "args": { + "External id": 980832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938263115.833, "dur": 47.255, + "args": { + "External id": 980833,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345938263188.205, "dur": 67.272, + "args": { + "External id": 980834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938263270.399, "dur": 40.024, + "args": { + "External id": 980835,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938263319.017, "dur": 38.937, + "args": { + "External id": 980836,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938263366.672, "dur": 34.807, + "args": { + "External id": 980837,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938263409.514, "dur": 35.149, + "args": { + "External id": 980838,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938263474.124, "dur": 27.739, + "args": { + "External id": 980839,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938263521.122, "dur": 30.292, + "args": { + "External id": 980840,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938263572.781, "dur": 22.248, + "args": { + "External id": 980841,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938263609.210, "dur": 16.739, + "args": { + "External id": 980842,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938263636.399, "dur": 43.368, + "args": { + "External id": 980843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938263684.319, "dur": 38.215, + "args": { + "External id": 980844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345938263758.391, "dur": 361.528, + "args": { + "External id": 980845,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938263850.939, "dur": 6.246, + "args": { + "External id": 980846,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938263859.725, "dur": 2.785, + "args": { + "External id": 980847,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938263863.973, "dur": 2.178, + "args": { + "External id": 980848,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938263867.620, "dur": 2.056, + "args": { + "External id": 980849,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938263919.965, "dur": 5.711, + "args": { + "External id": 980850,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938263922.339, "dur": 3.094, + "args": { + "External id": 980851,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938263928.279, "dur": 37.731, + "args": { + "External id": 980852,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938263935.159, "dur": 4.200, + "args": { + "External id": 980853,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938263968.049, "dur": 1.880, + "args": { + "External id": 980854,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938263969.112, "dur": 0.674, + "args": { + "External id": 980855,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938263971.326, "dur": 18.023, + "args": { + "External id": 980856,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938263974.162, "dur": 0.597, + "args": { + "External id": 980857,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938264174.820, "dur": 43.458, + "args": { + "External id": 980858,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938264237.842, "dur": 19.891, + "args": { + "External id": 980859,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938264268.116, "dur": 63.020, + "args": { + "External id": 980860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938264342.865, "dur": 47.642, + "args": { + "External id": 980861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938264406.875, "dur": 29.130, + "args": { + "External id": 980862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938264443.647, "dur": 41.765, + "args": { + "External id": 980863,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938264494.556, "dur": 33.784, + "args": { + "External id": 980864,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938264536.818, "dur": 35.628, + "args": { + "External id": 980865,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345938264592.343, "dur": 28.501, + "args": { + "External id": 980866,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938264637.268, "dur": 29.378, + "args": { + "External id": 980867,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938264682.161, "dur": 20.594, + "args": { + "External id": 980868,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938264721.233, "dur": 16.747, + "args": { + "External id": 980869,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345938264752.549, "dur": 18.361, + "args": { + "External id": 980870,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264854.875, "dur": 18.440, + "args": { + "External id": 980871,"Record function id": 0, "Ev Idx": 3462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264858.730, "dur": 13.531, + "args": { + "External id": 980872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264863.803, "dur": 7.260, + "args": { + "External id": 980873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264866.316, "dur": 4.609, + "args": { + "External id": 980874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264877.750, "dur": 5.976, + "args": { + "External id": 980875,"Record function id": 0, "Ev Idx": 3466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264879.283, "dur": 3.913, + "args": { + "External id": 980876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264880.010, "dur": 2.534, + "args": { + "External id": 980877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264881.148, "dur": 1.206, + "args": { + "External id": 980878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264887.621, "dur": 7.206, + "args": { + "External id": 980879,"Record function id": 0, "Ev Idx": 3470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264888.997, "dur": 5.322, + "args": { + "External id": 980880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264889.646, "dur": 4.137, + "args": { + "External id": 980881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264890.031, "dur": 3.636, + "args": { + "External id": 980882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264898.580, "dur": 5.169, + "args": { + "External id": 980883,"Record function id": 0, "Ev Idx": 3474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264900.166, "dur": 3.080, + "args": { + "External id": 980884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264900.805, "dur": 1.933, + "args": { + "External id": 980885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264901.775, "dur": 0.862, + "args": { + "External id": 980886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264907.675, "dur": 4.733, + "args": { + "External id": 980887,"Record function id": 0, "Ev Idx": 3478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264908.966, "dur": 2.960, + "args": { + "External id": 980888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264909.527, "dur": 1.840, + "args": { + "External id": 980889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264910.338, "dur": 0.953, + "args": { + "External id": 980890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264916.098, "dur": 4.372, + "args": { + "External id": 980891,"Record function id": 0, "Ev Idx": 3482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264917.311, "dur": 2.655, + "args": { + "External id": 980892,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264917.902, "dur": 1.578, + "args": { + "External id": 980893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264918.458, "dur": 0.898, + "args": { + "External id": 980894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264924.255, "dur": 4.185, + "args": { + "External id": 980895,"Record function id": 0, "Ev Idx": 3486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264925.558, "dur": 2.391, + "args": { + "External id": 980896,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264926.289, "dur": 1.166, + "args": { + "External id": 980897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264926.637, "dur": 0.707, + "args": { + "External id": 980898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264932.114, "dur": 4.456, + "args": { + "External id": 980899,"Record function id": 0, "Ev Idx": 3490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264933.321, "dur": 2.777, + "args": { + "External id": 980900,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264934.010, "dur": 1.602, + "args": { + "External id": 980901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264934.747, "dur": 0.775, + "args": { + "External id": 980902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264940.390, "dur": 4.703, + "args": { + "External id": 980903,"Record function id": 0, "Ev Idx": 3494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938264941.611, "dur": 2.983, + "args": { + "External id": 980904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264942.429, "dur": 1.641, + "args": { + "External id": 980905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938264943.186, "dur": 0.773, + "args": { + "External id": 980906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938264949.820, "dur": 64045.966, + "args": { + "External id": 980907,"Record function id": 0, "Sequence number": 10552271, "Fwd thread id": 1, "Ev Idx": 3498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938264951.357, "dur": 64034.586, + "args": { + "External id": 980908,"Sequence number": 10552271, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3499 + } + }, + { + "ph": "f", "id": 195, "pid": 2338708, "tid": 2379421, "ts": 6345938264951.357, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 2338708, "tid": 2379421, + "ts": 6345938264983.981, "dur": 63.120, + "args": { + "External id": 980909,"Record function id": 0, "Ev Idx": 3500 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 2338708, "tid": 2379421, + "ts": 6345938265099.590, "dur": 78.788, + "args": { + "External id": 980910,"Record function id": 0, "Ev Idx": 3501 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 2338708, "tid": 2379421, + "ts": 6345938265186.314, "dur": 63791.037, + "args": { + "External id": 980911,"Record function id": 0, "Ev Idx": 3502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938265292.778, "dur": 7.924, + "args": { + "External id": 980912,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938265313.091, "dur": 7.847, + "args": { + "External id": 980913,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938265339.582, "dur": 62636.493, + "args": { + "External id": 980914,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938265355.776, "dur": 62606.559, + "args": { + "External id": 980915,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938265492.753, "dur": 19.841, + "args": { + "External id": 980916,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938265535.301, "dur": 62374.991, + "args": { + "External id": 980917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938265538.470, "dur": 62370.776, + "args": { + "External id": 980918,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938265543.987, "dur": 10.843, + "args": { + "External id": 980919,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938265557.268, "dur": 62346.253, + "args": { + "External id": 980920,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938328144.011, "dur": 13.857, + "args": { + "External id": 980921,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938328148.721, "dur": 8.519, + "args": { + "External id": 980922,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938328193.706, "dur": 445.345, + "args": { + "External id": 980923,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938328224.180, "dur": 408.491, + "args": { + "External id": 980924,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3515, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938328237.076, "dur": 389.193, + "args": { + "External id": 980925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938328660.658, "dur": 2.584, + "args": { + "External id": 980926,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3517, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938328734.300, "dur": 7.615, + "args": { + "External id": 980927,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938328796.207, "dur": 1.757, + "args": { + "External id": 980928,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938328817.823, "dur": 3.602, + "args": { + "External id": 980929,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938328834.317, "dur": 1.632, + "args": { + "External id": 980930,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938328849.980, "dur": 0.997, + "args": { + "External id": 980931,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938328864.045, "dur": 1.186, + "args": { + "External id": 980932,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938328877.391, "dur": 3.330, + "args": { + "External id": 980933,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938328893.413, "dur": 2.572, + "args": { + "External id": 980934,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938328907.790, "dur": 1.031, + "args": { + "External id": 980935,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938329033.283, "dur": 3358.767, + "args": { + "External id": 980936,"Record function id": 0, "Ev Idx": 3527 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 2338708, "tid": 2379421, + "ts": 6345938329095.393, "dur": 1195.520, + "args": { + "External id": 980937,"Record function id": 0, "Ev Idx": 3528 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338708, "tid": 2379421, + "ts": 6345938329113.375, "dur": 378.306, + "args": { + "External id": 980938,"Record function id": 0, "Ev Idx": 3529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938329214.043, "dur": 5.613, + "args": { + "External id": 980939,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938329223.040, "dur": 1.096, + "args": { + "External id": 980940,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938329225.939, "dur": 3.031, + "args": { + "External id": 980941,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938329230.724, "dur": 1.113, + "args": { + "External id": 980942,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938329233.410, "dur": 1.102, + "args": { + "External id": 980943,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938329236.278, "dur": 0.988, + "args": { + "External id": 980944,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938329239.200, "dur": 2.199, + "args": { + "External id": 980945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938329242.995, "dur": 0.852, + "args": { + "External id": 980946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938329245.456, "dur": 0.702, + "args": { + "External id": 980947,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938329247.819, "dur": 0.746, + "args": { + "External id": 980948,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938329270.380, "dur": 183.954, + "args": { + "External id": 980949,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938329288.670, "dur": 160.286, + "args": { + "External id": 980950,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938329315.553, "dur": 19.335, + "args": { + "External id": 980951,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938329339.430, "dur": 76.341, + "args": { + "External id": 980952,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938329342.646, "dur": 72.710, + "args": { + "External id": 980953,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329347.498, "dur": 6.320, + "args": { + "External id": 980954,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938329355.893, "dur": 58.806, + "args": { + "External id": 980955,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 2338708, "tid": 2379421, + "ts": 6345938329581.782, "dur": 698.898, + "args": { + "External id": 980956,"Record function id": 0, "Ev Idx": 3547 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338708, "tid": 2379421, + "ts": 6345938329601.212, "dur": 664.497, + "args": { + "External id": 980957,"Record function id": 0, "Ev Idx": 3548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938329662.795, "dur": 5.155, + "args": { + "External id": 980958,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938329685.612, "dur": 39.491, + "args": { + "External id": 980959,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329691.460, "dur": 1.808, + "args": { + "External id": 980960,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329695.657, "dur": 0.531, + "args": { + "External id": 980961,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329698.000, "dur": 0.727, + "args": { + "External id": 980962,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329700.347, "dur": 0.386, + "args": { + "External id": 980963,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329702.587, "dur": 0.358, + "args": { + "External id": 980964,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329705.102, "dur": 2.462, + "args": { + "External id": 980965,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329709.540, "dur": 0.380, + "args": { + "External id": 980966,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329717.425, "dur": 0.523, + "args": { + "External id": 980967,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329719.849, "dur": 0.435, + "args": { + "External id": 980968,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938329738.956, "dur": 48.424, + "args": { + "External id": 980969,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345938329822.027, "dur": 124.507, + "args": { + "External id": 980970,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938329836.513, "dur": 3.668, + "args": { + "External id": 980971,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345938329846.871, "dur": 11.859, + "args": { + "External id": 980972,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345938329851.901, "dur": 6.364, + "args": { + "External id": 980973,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329856.174, "dur": 0.827, + "args": { + "External id": 980974,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938329866.085, "dur": 27.037, + "args": { + "External id": 980975,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329868.441, "dur": 0.613, + "args": { + "External id": 980976,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329871.020, "dur": 0.403, + "args": { + "External id": 980977,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329873.108, "dur": 2.751, + "args": { + "External id": 980978,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329877.573, "dur": 0.473, + "args": { + "External id": 980979,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329879.857, "dur": 0.390, + "args": { + "External id": 980980,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329881.911, "dur": 0.630, + "args": { + "External id": 980981,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329884.039, "dur": 0.445, + "args": { + "External id": 980982,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329886.171, "dur": 0.652, + "args": { + "External id": 980983,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938329888.081, "dur": 0.640, + "args": { + "External id": 980984,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938329904.492, "dur": 32.955, + "args": { + "External id": 980985,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938329997.419, "dur": 186.033, + "args": { + "External id": 980986,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938330039.418, "dur": 139.310, + "args": { + "External id": 980987,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3578, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938330051.430, "dur": 121.972, + "args": { + "External id": 980988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938330200.788, "dur": 2.247, + "args": { + "External id": 980989,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3580, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938330298.756, "dur": 2070.397, + "args": { + "External id": 980990,"Sequence number": 10552270, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3581 + } + }, + { + "ph": "f", "id": 196, "pid": 2338708, "tid": 2379421, "ts": 6345938330298.756, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938330427.661, "dur": 116.235, + "args": { + "External id": 980991,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938330588.031, "dur": 45.599, + "args": { + "External id": 980992,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345938330692.986, "dur": 63.193, + "args": { + "External id": 980993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938330771.730, "dur": 40.642, + "args": { + "External id": 980994,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938330821.204, "dur": 39.761, + "args": { + "External id": 980995,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938330869.577, "dur": 35.792, + "args": { + "External id": 980996,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938330913.600, "dur": 36.510, + "args": { + "External id": 980997,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938330979.577, "dur": 27.144, + "args": { + "External id": 980998,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938331051.598, "dur": 75.613, + "args": { + "External id": 980999,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938331154.987, "dur": 22.781, + "args": { + "External id": 981000,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938331192.757, "dur": 18.803, + "args": { + "External id": 981001,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938331223.015, "dur": 52.717, + "args": { + "External id": 981002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938331280.458, "dur": 39.548, + "args": { + "External id": 981003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345938331356.325, "dur": 308.868, + "args": { + "External id": 981004,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938331448.761, "dur": 6.591, + "args": { + "External id": 981005,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938331457.750, "dur": 2.651, + "args": { + "External id": 981006,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938331461.819, "dur": 2.140, + "args": { + "External id": 981007,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938331465.056, "dur": 1.815, + "args": { + "External id": 981008,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938331517.975, "dur": 5.581, + "args": { + "External id": 981009,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938331520.288, "dur": 3.029, + "args": { + "External id": 981010,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938331525.836, "dur": 41.376, + "args": { + "External id": 981011,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938331532.539, "dur": 4.493, + "args": { + "External id": 981012,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938331569.228, "dur": 1.970, + "args": { + "External id": 981013,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938331570.350, "dur": 0.732, + "args": { + "External id": 981014,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938331572.725, "dur": 33.610, + "args": { + "External id": 981015,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938331574.954, "dur": 0.727, + "args": { + "External id": 981016,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938331705.856, "dur": 35.747, + "args": { + "External id": 981017,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938331759.308, "dur": 18.839, + "args": { + "External id": 981018,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938331791.361, "dur": 46.459, + "args": { + "External id": 981019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938331846.446, "dur": 44.307, + "args": { + "External id": 981020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938331903.528, "dur": 26.399, + "args": { + "External id": 981021,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938331937.888, "dur": 38.170, + "args": { + "External id": 981022,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938331985.254, "dur": 55.458, + "args": { + "External id": 981023,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938332090.674, "dur": 44.528, + "args": { + "External id": 981024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345938332160.014, "dur": 28.714, + "args": { + "External id": 981025,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938332206.675, "dur": 26.682, + "args": { + "External id": 981026,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938332249.317, "dur": 18.434, + "args": { + "External id": 981027,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938332286.522, "dur": 16.517, + "args": { + "External id": 981028,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345938332316.948, "dur": 17.094, + "args": { + "External id": 981029,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332417.862, "dur": 19.003, + "args": { + "External id": 981030,"Record function id": 0, "Ev Idx": 3621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332422.147, "dur": 13.630, + "args": { + "External id": 981031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332427.192, "dur": 7.412, + "args": { + "External id": 981032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332429.773, "dur": 4.650, + "args": { + "External id": 981033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332441.528, "dur": 6.529, + "args": { + "External id": 981034,"Record function id": 0, "Ev Idx": 3625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332443.456, "dur": 4.041, + "args": { + "External id": 981035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332444.522, "dur": 2.415, + "args": { + "External id": 981036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332445.407, "dur": 1.384, + "args": { + "External id": 981037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332452.005, "dur": 7.704, + "args": { + "External id": 981038,"Record function id": 0, "Ev Idx": 3629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332453.630, "dur": 5.605, + "args": { + "External id": 981039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332454.382, "dur": 4.309, + "args": { + "External id": 981040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332455.137, "dur": 3.450, + "args": { + "External id": 981041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332463.513, "dur": 4.364, + "args": { + "External id": 981042,"Record function id": 0, "Ev Idx": 3633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332464.800, "dur": 2.577, + "args": { + "External id": 981043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332465.465, "dur": 1.397, + "args": { + "External id": 981044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332465.837, "dur": 0.946, + "args": { + "External id": 981045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332471.593, "dur": 4.269, + "args": { + "External id": 981046,"Record function id": 0, "Ev Idx": 3637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332472.781, "dur": 2.610, + "args": { + "External id": 981047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332473.456, "dur": 1.469, + "args": { + "External id": 981048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332473.862, "dur": 0.974, + "args": { + "External id": 981049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332479.927, "dur": 5.715, + "args": { + "External id": 981050,"Record function id": 0, "Ev Idx": 3641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332481.496, "dur": 3.622, + "args": { + "External id": 981051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332482.589, "dur": 2.018, + "args": { + "External id": 981052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332483.369, "dur": 1.102, + "args": { + "External id": 981053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332489.844, "dur": 4.200, + "args": { + "External id": 981054,"Record function id": 0, "Ev Idx": 3645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332491.214, "dur": 2.368, + "args": { + "External id": 981055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332491.967, "dur": 1.122, + "args": { + "External id": 981056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332492.288, "dur": 0.694, + "args": { + "External id": 981057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332497.845, "dur": 4.542, + "args": { + "External id": 981058,"Record function id": 0, "Ev Idx": 3649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332499.222, "dur": 2.697, + "args": { + "External id": 981059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332499.967, "dur": 1.462, + "args": { + "External id": 981060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332500.498, "dur": 0.842, + "args": { + "External id": 981061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332506.400, "dur": 4.892, + "args": { + "External id": 981062,"Record function id": 0, "Ev Idx": 3653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938332508.027, "dur": 2.802, + "args": { + "External id": 981063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332508.804, "dur": 1.558, + "args": { + "External id": 981064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938332509.525, "dur": 0.724, + "args": { + "External id": 981065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938332515.855, "dur": 66473.729, + "args": { + "External id": 981066,"Record function id": 0, "Sequence number": 10552269, "Fwd thread id": 1, "Ev Idx": 3657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938332517.506, "dur": 66462.290, + "args": { + "External id": 981067,"Sequence number": 10552269, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3658 + } + }, + { + "ph": "f", "id": 197, "pid": 2338708, "tid": 2379421, "ts": 6345938332517.506, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 2338708, "tid": 2379421, + "ts": 6345938332551.076, "dur": 44.944, + "args": { + "External id": 981068,"Record function id": 0, "Ev Idx": 3659 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 2338708, "tid": 2379421, + "ts": 6345938332604.902, "dur": 72.094, + "args": { + "External id": 981069,"Record function id": 0, "Ev Idx": 3660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 2338708, "tid": 2379421, + "ts": 6345938332684.137, "dur": 66286.463, + "args": { + "External id": 981070,"Record function id": 0, "Ev Idx": 3661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938332787.395, "dur": 7.352, + "args": { + "External id": 981071,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938332805.527, "dur": 7.635, + "args": { + "External id": 981072,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938332830.562, "dur": 65310.553, + "args": { + "External id": 981073,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938332846.387, "dur": 65279.669, + "args": { + "External id": 981074,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938332968.737, "dur": 22.581, + "args": { + "External id": 981075,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938333037.357, "dur": 65006.871, + "args": { + "External id": 981076,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938333044.297, "dur": 64998.651, + "args": { + "External id": 981077,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938333050.506, "dur": 56.504, + "args": { + "External id": 981078,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938333110.852, "dur": 64926.342, + "args": { + "External id": 981079,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938398263.543, "dur": 14.034, + "args": { + "External id": 981080,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938398268.423, "dur": 8.660, + "args": { + "External id": 981081,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938398312.293, "dur": 328.640, + "args": { + "External id": 981082,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938398343.286, "dur": 292.127, + "args": { + "External id": 981083,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3674, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938398356.133, "dur": 273.123, + "args": { + "External id": 981084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938398662.563, "dur": 2.343, + "args": { + "External id": 981085,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3676, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938398731.425, "dur": 7.491, + "args": { + "External id": 981086,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938398789.633, "dur": 1.644, + "args": { + "External id": 981087,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938398811.379, "dur": 3.866, + "args": { + "External id": 981088,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938398828.352, "dur": 1.272, + "args": { + "External id": 981089,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938398842.207, "dur": 1.128, + "args": { + "External id": 981090,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938398854.785, "dur": 1.127, + "args": { + "External id": 981091,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938398868.435, "dur": 3.177, + "args": { + "External id": 981092,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938398884.979, "dur": 3.307, + "args": { + "External id": 981093,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938398899.950, "dur": 1.195, + "args": { + "External id": 981094,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938399006.093, "dur": 3403.063, + "args": { + "External id": 981095,"Record function id": 0, "Ev Idx": 3686 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 2338708, "tid": 2379421, + "ts": 6345938399088.032, "dur": 1237.828, + "args": { + "External id": 981096,"Record function id": 0, "Ev Idx": 3687 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338708, "tid": 2379421, + "ts": 6345938399111.390, "dur": 393.270, + "args": { + "External id": 981097,"Record function id": 0, "Ev Idx": 3688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938399215.430, "dur": 6.059, + "args": { + "External id": 981098,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938399225.460, "dur": 1.433, + "args": { + "External id": 981099,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938399229.128, "dur": 3.454, + "args": { + "External id": 981100,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938399234.302, "dur": 1.358, + "args": { + "External id": 981101,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938399237.255, "dur": 1.078, + "args": { + "External id": 981102,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938399240.006, "dur": 1.129, + "args": { + "External id": 981103,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938399242.642, "dur": 2.307, + "args": { + "External id": 981104,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938399246.633, "dur": 0.899, + "args": { + "External id": 981105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938399249.066, "dur": 0.930, + "args": { + "External id": 981106,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938399251.516, "dur": 0.923, + "args": { + "External id": 981107,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938399272.803, "dur": 192.737, + "args": { + "External id": 981108,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938399309.410, "dur": 150.712, + "args": { + "External id": 981109,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938399327.488, "dur": 17.866, + "args": { + "External id": 981110,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938399349.800, "dur": 76.121, + "args": { + "External id": 981111,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938399352.948, "dur": 72.576, + "args": { + "External id": 981112,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399357.656, "dur": 6.091, + "args": { + "External id": 981113,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938399366.087, "dur": 58.636, + "args": { + "External id": 981114,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 2338708, "tid": 2379421, + "ts": 6345938399597.827, "dur": 718.134, + "args": { + "External id": 981115,"Record function id": 0, "Ev Idx": 3706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338708, "tid": 2379421, + "ts": 6345938399617.248, "dur": 683.017, + "args": { + "External id": 981116,"Record function id": 0, "Ev Idx": 3707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938399682.021, "dur": 6.261, + "args": { + "External id": 981117,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938399705.766, "dur": 32.745, + "args": { + "External id": 981118,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399711.448, "dur": 2.061, + "args": { + "External id": 981119,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399715.905, "dur": 0.629, + "args": { + "External id": 981120,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399718.328, "dur": 0.560, + "args": { + "External id": 981121,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399720.614, "dur": 0.534, + "args": { + "External id": 981122,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399722.822, "dur": 0.513, + "args": { + "External id": 981123,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399725.155, "dur": 2.579, + "args": { + "External id": 981124,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399729.396, "dur": 0.403, + "args": { + "External id": 981125,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399731.508, "dur": 0.432, + "args": { + "External id": 981126,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399733.513, "dur": 0.595, + "args": { + "External id": 981127,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938399749.351, "dur": 51.563, + "args": { + "External id": 981128,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345938399842.144, "dur": 128.121, + "args": { + "External id": 981129,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938399853.528, "dur": 3.973, + "args": { + "External id": 981130,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345938399863.568, "dur": 12.496, + "args": { + "External id": 981131,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345938399868.887, "dur": 6.657, + "args": { + "External id": 981132,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399873.388, "dur": 0.702, + "args": { + "External id": 981133,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938399883.793, "dur": 27.821, + "args": { + "External id": 981134,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399887.004, "dur": 0.608, + "args": { + "External id": 981135,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399889.243, "dur": 0.530, + "args": { + "External id": 981136,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399891.695, "dur": 2.589, + "args": { + "External id": 981137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399895.778, "dur": 0.523, + "args": { + "External id": 981138,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399898.227, "dur": 0.732, + "args": { + "External id": 981139,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399900.670, "dur": 0.392, + "args": { + "External id": 981140,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399902.580, "dur": 0.521, + "args": { + "External id": 981141,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399904.696, "dur": 0.412, + "args": { + "External id": 981142,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938399907.025, "dur": 0.388, + "args": { + "External id": 981143,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938399924.276, "dur": 37.031, + "args": { + "External id": 981144,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938400042.313, "dur": 171.766, + "args": { + "External id": 981145,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938400108.140, "dur": 101.504, + "args": { + "External id": 981146,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3737, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938400120.302, "dur": 84.536, + "args": { + "External id": 981147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938400232.166, "dur": 2.114, + "args": { + "External id": 981148,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3739, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938400334.889, "dur": 2049.405, + "args": { + "External id": 981149,"Sequence number": 10552268, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3740 + } + }, + { + "ph": "f", "id": 198, "pid": 2338708, "tid": 2379421, "ts": 6345938400334.889, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938400461.306, "dur": 116.373, + "args": { + "External id": 981150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938400619.803, "dur": 44.305, + "args": { + "External id": 981151,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345938400684.443, "dur": 56.890, + "args": { + "External id": 981152,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938400755.318, "dur": 37.604, + "args": { + "External id": 981153,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938400801.216, "dur": 39.698, + "args": { + "External id": 981154,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938400849.636, "dur": 35.023, + "args": { + "External id": 981155,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938400892.601, "dur": 35.436, + "args": { + "External id": 981156,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938400957.118, "dur": 26.431, + "args": { + "External id": 981157,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938401005.612, "dur": 96.009, + "args": { + "External id": 981158,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938401130.865, "dur": 26.659, + "args": { + "External id": 981159,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938401171.978, "dur": 20.010, + "args": { + "External id": 981160,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938401204.002, "dur": 51.256, + "args": { + "External id": 981161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938401260.018, "dur": 38.485, + "args": { + "External id": 981162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345938401346.206, "dur": 291.465, + "args": { + "External id": 981163,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938401440.645, "dur": 6.885, + "args": { + "External id": 981164,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938401449.949, "dur": 2.937, + "args": { + "External id": 981165,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938401454.672, "dur": 2.460, + "args": { + "External id": 981166,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938401458.257, "dur": 1.923, + "args": { + "External id": 981167,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938401512.467, "dur": 5.498, + "args": { + "External id": 981168,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938401514.556, "dur": 3.178, + "args": { + "External id": 981169,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938401520.442, "dur": 39.849, + "args": { + "External id": 981170,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938401526.813, "dur": 3.778, + "args": { + "External id": 981171,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938401562.737, "dur": 2.170, + "args": { + "External id": 981172,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938401563.971, "dur": 0.829, + "args": { + "External id": 981173,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938401566.410, "dur": 16.973, + "args": { + "External id": 981174,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938401568.201, "dur": 0.690, + "args": { + "External id": 981175,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938401693.739, "dur": 38.911, + "args": { + "External id": 981176,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938401755.365, "dur": 18.304, + "args": { + "External id": 981177,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938401784.940, "dur": 52.725, + "args": { + "External id": 981178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938401845.932, "dur": 50.074, + "args": { + "External id": 981179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938401908.849, "dur": 26.009, + "args": { + "External id": 981180,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938401941.990, "dur": 36.898, + "args": { + "External id": 981181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938401987.841, "dur": 58.174, + "args": { + "External id": 981182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938402097.454, "dur": 45.233, + "args": { + "External id": 981183,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345938402166.449, "dur": 31.086, + "args": { + "External id": 981184,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938402216.310, "dur": 29.919, + "args": { + "External id": 981185,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938402262.592, "dur": 20.392, + "args": { + "External id": 981186,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938402301.776, "dur": 15.699, + "args": { + "External id": 981187,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345938402331.059, "dur": 17.559, + "args": { + "External id": 981188,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402435.289, "dur": 17.988, + "args": { + "External id": 981189,"Record function id": 0, "Ev Idx": 3780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402438.949, "dur": 13.187, + "args": { + "External id": 981190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402443.605, "dur": 7.356, + "args": { + "External id": 981191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402446.054, "dur": 4.759, + "args": { + "External id": 981192,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402458.209, "dur": 5.660, + "args": { + "External id": 981193,"Record function id": 0, "Ev Idx": 3784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402459.597, "dur": 3.708, + "args": { + "External id": 981194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402460.350, "dur": 2.375, + "args": { + "External id": 981195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402461.387, "dur": 1.198, + "args": { + "External id": 981196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402468.090, "dur": 8.655, + "args": { + "External id": 981197,"Record function id": 0, "Ev Idx": 3788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402469.511, "dur": 6.744, + "args": { + "External id": 981198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402470.592, "dur": 5.139, + "args": { + "External id": 981199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402471.349, "dur": 4.265, + "args": { + "External id": 981200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402480.608, "dur": 4.860, + "args": { + "External id": 981201,"Record function id": 0, "Ev Idx": 3792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402481.976, "dur": 2.967, + "args": { + "External id": 981202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402482.775, "dur": 1.664, + "args": { + "External id": 981203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402483.566, "dur": 0.780, + "args": { + "External id": 981204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402489.349, "dur": 4.690, + "args": { + "External id": 981205,"Record function id": 0, "Ev Idx": 3796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402490.656, "dur": 2.890, + "args": { + "External id": 981206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402491.580, "dur": 1.448, + "args": { + "External id": 981207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402492.020, "dur": 0.920, + "args": { + "External id": 981208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402497.968, "dur": 4.233, + "args": { + "External id": 981209,"Record function id": 0, "Ev Idx": 3800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402499.122, "dur": 2.573, + "args": { + "External id": 981210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402499.675, "dur": 1.493, + "args": { + "External id": 981211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402500.354, "dur": 0.683, + "args": { + "External id": 981212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402506.080, "dur": 4.127, + "args": { + "External id": 981213,"Record function id": 0, "Ev Idx": 3804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402507.363, "dur": 2.350, + "args": { + "External id": 981214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402508.006, "dur": 1.230, + "args": { + "External id": 981215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402508.344, "dur": 0.750, + "args": { + "External id": 981216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402514.036, "dur": 4.284, + "args": { + "External id": 981217,"Record function id": 0, "Ev Idx": 3808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402515.144, "dur": 2.691, + "args": { + "External id": 981218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402515.741, "dur": 1.577, + "args": { + "External id": 981219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402516.468, "dur": 0.760, + "args": { + "External id": 981220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402522.103, "dur": 4.183, + "args": { + "External id": 981221,"Record function id": 0, "Ev Idx": 3812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938402523.199, "dur": 2.611, + "args": { + "External id": 981222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402523.711, "dur": 1.572, + "args": { + "External id": 981223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938402524.381, "dur": 0.787, + "args": { + "External id": 981224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938402530.700, "dur": 66220.871, + "args": { + "External id": 981225,"Record function id": 0, "Sequence number": 10552267, "Fwd thread id": 1, "Ev Idx": 3816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938402531.997, "dur": 66209.872, + "args": { + "External id": 981226,"Sequence number": 10552267, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3817 + } + }, + { + "ph": "f", "id": 199, "pid": 2338708, "tid": 2379421, "ts": 6345938402531.997, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 2338708, "tid": 2379421, + "ts": 6345938402567.116, "dur": 43.082, + "args": { + "External id": 981227,"Record function id": 0, "Ev Idx": 3818 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 2338708, "tid": 2379421, + "ts": 6345938402619.641, "dur": 71.802, + "args": { + "External id": 981228,"Record function id": 0, "Ev Idx": 3819 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 2338708, "tid": 2379421, + "ts": 6345938402697.998, "dur": 66034.852, + "args": { + "External id": 981229,"Record function id": 0, "Ev Idx": 3820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938402801.266, "dur": 7.784, + "args": { + "External id": 981230,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938402820.284, "dur": 7.280, + "args": { + "External id": 981231,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938402843.484, "dur": 64878.613, + "args": { + "External id": 981232,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938402858.465, "dur": 64849.135, + "args": { + "External id": 981233,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938402972.004, "dur": 19.090, + "args": { + "External id": 981234,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938403042.539, "dur": 64616.600, + "args": { + "External id": 981235,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938403046.542, "dur": 64611.602, + "args": { + "External id": 981236,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938403095.711, "dur": 15.232, + "args": { + "External id": 981237,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938403114.430, "dur": 64538.307, + "args": { + "External id": 981238,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938467841.600, "dur": 13.905, + "args": { + "External id": 981239,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938467846.038, "dur": 8.999, + "args": { + "External id": 981240,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938467890.287, "dur": 504.639, + "args": { + "External id": 981241,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938467920.803, "dur": 467.228, + "args": { + "External id": 981242,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3833, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938467933.696, "dur": 445.748, + "args": { + "External id": 981243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938468421.161, "dur": 2.793, + "args": { + "External id": 981244,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3835, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938468501.284, "dur": 8.185, + "args": { + "External id": 981245,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938468560.805, "dur": 1.901, + "args": { + "External id": 981246,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938468581.034, "dur": 3.814, + "args": { + "External id": 981247,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938468597.861, "dur": 1.111, + "args": { + "External id": 981248,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938468610.898, "dur": 0.820, + "args": { + "External id": 981249,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938468622.560, "dur": 0.884, + "args": { + "External id": 981250,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938468634.212, "dur": 3.303, + "args": { + "External id": 981251,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938468649.039, "dur": 2.152, + "args": { + "External id": 981252,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938468662.942, "dur": 0.814, + "args": { + "External id": 981253,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938468768.101, "dur": 3233.865, + "args": { + "External id": 981254,"Record function id": 0, "Ev Idx": 3845 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 2338708, "tid": 2379421, + "ts": 6345938468789.591, "dur": 1198.012, + "args": { + "External id": 981255,"Record function id": 0, "Ev Idx": 3846 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338708, "tid": 2379421, + "ts": 6345938468806.057, "dur": 436.428, + "args": { + "External id": 981256,"Record function id": 0, "Ev Idx": 3847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938468900.276, "dur": 5.465, + "args": { + "External id": 981257,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938468909.654, "dur": 0.968, + "args": { + "External id": 981258,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938468912.787, "dur": 3.510, + "args": { + "External id": 981259,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938468918.696, "dur": 1.076, + "args": { + "External id": 981260,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938468921.554, "dur": 0.647, + "args": { + "External id": 981261,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938468923.846, "dur": 0.843, + "args": { + "External id": 981262,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 3853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938468926.594, "dur": 2.362, + "args": { + "External id": 981263,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 3854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938468930.803, "dur": 0.743, + "args": { + "External id": 981264,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938468935.975, "dur": 0.593, + "args": { + "External id": 981265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938468938.139, "dur": 0.829, + "args": { + "External id": 981266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 3857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938468959.462, "dur": 240.956, + "args": { + "External id": 981267,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938468977.756, "dur": 216.038, + "args": { + "External id": 981268,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 3859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938468997.335, "dur": 39.007, + "args": { + "External id": 981269,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938469041.463, "dur": 118.876, + "args": { + "External id": 981270,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 3861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938469047.121, "dur": 112.740, + "args": { + "External id": 981271,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 3862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469051.961, "dur": 44.751, + "args": { + "External id": 981272,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938469100.134, "dur": 58.886, + "args": { + "External id": 981273,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 3864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 2338708, "tid": 2379421, + "ts": 6345938469335.630, "dur": 642.866, + "args": { + "External id": 981274,"Record function id": 0, "Ev Idx": 3865 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338708, "tid": 2379421, + "ts": 6345938469355.143, "dur": 608.774, + "args": { + "External id": 981275,"Record function id": 0, "Ev Idx": 3866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938469422.332, "dur": 7.152, + "args": { + "External id": 981276,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938469447.052, "dur": 33.500, + "args": { + "External id": 981277,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469452.418, "dur": 3.021, + "args": { + "External id": 981278,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469457.130, "dur": 0.389, + "args": { + "External id": 981279,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469458.475, "dur": 0.520, + "args": { + "External id": 981280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469461.412, "dur": 0.369, + "args": { + "External id": 981281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469463.346, "dur": 0.795, + "args": { + "External id": 981282,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469465.507, "dur": 2.958, + "args": { + "External id": 981283,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469470.152, "dur": 0.844, + "args": { + "External id": 981284,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469472.452, "dur": 0.604, + "args": { + "External id": 981285,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469474.807, "dur": 0.554, + "args": { + "External id": 981286,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938469492.728, "dur": 49.440, + "args": { + "External id": 981287,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345938469576.296, "dur": 125.079, + "args": { + "External id": 981288,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 3879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938469588.311, "dur": 4.585, + "args": { + "External id": 981289,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345938469598.632, "dur": 11.703, + "args": { + "External id": 981290,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345938469603.279, "dur": 6.580, + "args": { + "External id": 981291,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 3882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469607.704, "dur": 0.748, + "args": { + "External id": 981292,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 3883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938469618.117, "dur": 30.451, + "args": { + "External id": 981293,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 3884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469620.566, "dur": 0.444, + "args": { + "External id": 981294,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469624.028, "dur": 0.505, + "args": { + "External id": 981295,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469629.420, "dur": 2.516, + "args": { + "External id": 981296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469633.598, "dur": 0.391, + "args": { + "External id": 981297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469635.409, "dur": 0.467, + "args": { + "External id": 981298,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469637.797, "dur": 0.353, + "args": { + "External id": 981299,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469639.886, "dur": 0.356, + "args": { + "External id": 981300,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469642.004, "dur": 0.573, + "args": { + "External id": 981301,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938469644.036, "dur": 0.327, + "args": { + "External id": 981302,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938469659.655, "dur": 33.693, + "args": { + "External id": 981303,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 3894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938469750.952, "dur": 134.792, + "args": { + "External id": 981304,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 3895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938469786.161, "dur": 95.622, + "args": { + "External id": 981305,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3896, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938469796.413, "dur": 80.409, + "args": { + "External id": 981306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 3897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938469906.053, "dur": 2.306, + "args": { + "External id": 981307,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3898, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938469995.591, "dur": 1984.442, + "args": { + "External id": 981308,"Sequence number": 10552266, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3899 + } + }, + { + "ph": "f", "id": 200, "pid": 2338708, "tid": 2379421, "ts": 6345938469995.591, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938470190.522, "dur": 120.950, + "args": { + "External id": 981309,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 3900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938470361.770, "dur": 44.962, + "args": { + "External id": 981310,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 3901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345938470426.310, "dur": 56.492, + "args": { + "External id": 981311,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 3902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938470497.615, "dur": 35.222, + "args": { + "External id": 981312,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938470539.724, "dur": 36.762, + "args": { + "External id": 981313,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938470584.210, "dur": 32.175, + "args": { + "External id": 981314,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 3905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938470624.602, "dur": 32.686, + "args": { + "External id": 981315,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 3906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938470687.559, "dur": 25.161, + "args": { + "External id": 981316,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 3907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938470733.590, "dur": 29.668, + "args": { + "External id": 981317,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938470790.855, "dur": 23.650, + "args": { + "External id": 981318,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938470831.412, "dur": 18.066, + "args": { + "External id": 981319,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938470858.352, "dur": 42.990, + "args": { + "External id": 981320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938470905.528, "dur": 36.185, + "args": { + "External id": 981321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345938470974.914, "dur": 362.481, + "args": { + "External id": 981322,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938471127.078, "dur": 8.062, + "args": { + "External id": 981323,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938471137.908, "dur": 3.046, + "args": { + "External id": 981324,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938471142.310, "dur": 2.083, + "args": { + "External id": 981325,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938471145.524, "dur": 1.922, + "args": { + "External id": 981326,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938471197.009, "dur": 5.873, + "args": { + "External id": 981327,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938471199.227, "dur": 3.405, + "args": { + "External id": 981328,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938471204.797, "dur": 46.150, + "args": { + "External id": 981329,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938471214.060, "dur": 4.038, + "args": { + "External id": 981330,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938471252.624, "dur": 1.883, + "args": { + "External id": 981331,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938471253.707, "dur": 0.702, + "args": { + "External id": 981332,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 3923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938471255.785, "dur": 16.112, + "args": { + "External id": 981333,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 3924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938471258.190, "dur": 0.961, + "args": { + "External id": 981334,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 3925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938471383.783, "dur": 38.905, + "args": { + "External id": 981335,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938471445.681, "dur": 18.013, + "args": { + "External id": 981336,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938471472.930, "dur": 58.680, + "args": { + "External id": 981337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938471539.309, "dur": 43.073, + "args": { + "External id": 981338,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938471593.701, "dur": 26.240, + "args": { + "External id": 981339,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 3930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938471626.512, "dur": 35.035, + "args": { + "External id": 981340,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 3931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938471669.493, "dur": 31.699, + "args": { + "External id": 981341,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 3932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938471708.341, "dur": 33.421, + "args": { + "External id": 981342,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 3933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345938471766.242, "dur": 25.355, + "args": { + "External id": 981343,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 3934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938471809.102, "dur": 26.250, + "args": { + "External id": 981344,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938471854.413, "dur": 20.539, + "args": { + "External id": 981345,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 3936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938471895.093, "dur": 16.677, + "args": { + "External id": 981346,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 3937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345938471927.992, "dur": 18.610, + "args": { + "External id": 981347,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 3938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472049.256, "dur": 59.793, + "args": { + "External id": 981348,"Record function id": 0, "Ev Idx": 3939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472091.693, "dur": 15.550, + "args": { + "External id": 981349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472097.470, "dur": 8.023, + "args": { + "External id": 981350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472099.290, "dur": 5.852, + "args": { + "External id": 981351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472116.891, "dur": 5.984, + "args": { + "External id": 981352,"Record function id": 0, "Ev Idx": 3943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472119.615, "dur": 2.732, + "args": { + "External id": 981353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472120.393, "dur": 1.453, + "args": { + "External id": 981354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472120.798, "dur": 0.905, + "args": { + "External id": 981355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472127.023, "dur": 7.205, + "args": { + "External id": 981356,"Record function id": 0, "Ev Idx": 3947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472128.531, "dur": 5.137, + "args": { + "External id": 981357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472129.106, "dur": 4.077, + "args": { + "External id": 981358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472129.724, "dur": 3.377, + "args": { + "External id": 981359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 3950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472138.186, "dur": 5.237, + "args": { + "External id": 981360,"Record function id": 0, "Ev Idx": 3951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472139.716, "dur": 3.133, + "args": { + "External id": 981361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472140.289, "dur": 2.077, + "args": { + "External id": 981362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472141.377, "dur": 0.853, + "args": { + "External id": 981363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 3954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472147.283, "dur": 4.728, + "args": { + "External id": 981364,"Record function id": 0, "Ev Idx": 3955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472148.903, "dur": 2.580, + "args": { + "External id": 981365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472149.548, "dur": 1.362, + "args": { + "External id": 981366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472150.141, "dur": 0.663, + "args": { + "External id": 981367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472155.867, "dur": 4.719, + "args": { + "External id": 981368,"Record function id": 0, "Ev Idx": 3959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472157.457, "dur": 2.608, + "args": { + "External id": 981369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472158.121, "dur": 1.454, + "args": { + "External id": 981370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472158.667, "dur": 0.790, + "args": { + "External id": 981371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 3962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472164.606, "dur": 12.697, + "args": { + "External id": 981372,"Record function id": 0, "Ev Idx": 3963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472165.825, "dur": 10.937, + "args": { + "External id": 981373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472166.590, "dur": 9.543, + "args": { + "External id": 981374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472175.339, "dur": 0.688, + "args": { + "External id": 981375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472181.343, "dur": 4.546, + "args": { + "External id": 981376,"Record function id": 0, "Ev Idx": 3967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472182.753, "dur": 2.645, + "args": { + "External id": 981377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472183.468, "dur": 1.448, + "args": { + "External id": 981378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472184.041, "dur": 0.768, + "args": { + "External id": 981379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 3970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472189.733, "dur": 5.962, + "args": { + "External id": 981380,"Record function id": 0, "Ev Idx": 3971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938472190.863, "dur": 4.321, + "args": { + "External id": 981381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472191.469, "dur": 3.208, + "args": { + "External id": 981382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938472193.815, "dur": 0.723, + "args": { + "External id": 981383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 3974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938472200.228, "dur": 65606.692, + "args": { + "External id": 981384,"Record function id": 0, "Sequence number": 10552265, "Fwd thread id": 1, "Ev Idx": 3975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938472202.042, "dur": 65595.574, + "args": { + "External id": 981385,"Sequence number": 10552265, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 3976 + } + }, + { + "ph": "f", "id": 201, "pid": 2338708, "tid": 2379421, "ts": 6345938472202.042, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 2338708, "tid": 2379421, + "ts": 6345938472237.577, "dur": 42.171, + "args": { + "External id": 981386,"Record function id": 0, "Ev Idx": 3977 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 2338708, "tid": 2379421, + "ts": 6345938472288.716, "dur": 75.103, + "args": { + "External id": 981387,"Record function id": 0, "Ev Idx": 3978 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 2338708, "tid": 2379421, + "ts": 6345938472371.199, "dur": 65417.546, + "args": { + "External id": 981388,"Record function id": 0, "Ev Idx": 3979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938472473.624, "dur": 9.152, + "args": { + "External id": 981389,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938472496.209, "dur": 7.694, + "args": { + "External id": 981390,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 3981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938472519.741, "dur": 64313.093, + "args": { + "External id": 981391,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938472535.330, "dur": 64282.648, + "args": { + "External id": 981392,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 3983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938472635.050, "dur": 20.466, + "args": { + "External id": 981393,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938472678.077, "dur": 64088.481, + "args": { + "External id": 981394,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 3985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938472681.884, "dur": 64083.621, + "args": { + "External id": 981395,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 3986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938472687.215, "dur": 14.566, + "args": { + "External id": 981396,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938472704.172, "dur": 64055.581, + "args": { + "External id": 981397,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 3988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938536950.200, "dur": 13.165, + "args": { + "External id": 981398,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 3989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938536953.928, "dur": 8.892, + "args": { + "External id": 981399,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938536997.926, "dur": 461.167, + "args": { + "External id": 981400,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 3991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938537048.707, "dur": 404.076, + "args": { + "External id": 981401,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3992, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938537093.528, "dur": 353.421, + "args": { + "External id": 981402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 3993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938537487.341, "dur": 2.631, + "args": { + "External id": 981403,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3994, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938537562.025, "dur": 7.991, + "args": { + "External id": 981404,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938537619.498, "dur": 1.630, + "args": { + "External id": 981405,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938537637.698, "dur": 3.660, + "args": { + "External id": 981406,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938537653.821, "dur": 0.862, + "args": { + "External id": 981407,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938537666.740, "dur": 1.300, + "args": { + "External id": 981408,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 3999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938537679.648, "dur": 1.054, + "args": { + "External id": 981409,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938537694.538, "dur": 3.289, + "args": { + "External id": 981410,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938537709.651, "dur": 2.196, + "args": { + "External id": 981411,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938537722.274, "dur": 0.876, + "args": { + "External id": 981412,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938537823.555, "dur": 3415.238, + "args": { + "External id": 981413,"Record function id": 0, "Ev Idx": 4004 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 2338708, "tid": 2379421, + "ts": 6345938537844.309, "dur": 1271.211, + "args": { + "External id": 981414,"Record function id": 0, "Ev Idx": 4005 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338708, "tid": 2379421, + "ts": 6345938537860.437, "dur": 434.187, + "args": { + "External id": 981415,"Record function id": 0, "Ev Idx": 4006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938537951.725, "dur": 4.909, + "args": { + "External id": 981416,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938537960.711, "dur": 0.853, + "args": { + "External id": 981417,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938537963.724, "dur": 2.747, + "args": { + "External id": 981418,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938537968.836, "dur": 0.864, + "args": { + "External id": 981419,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938537971.704, "dur": 0.720, + "args": { + "External id": 981420,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938537976.663, "dur": 0.782, + "args": { + "External id": 981421,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938537979.546, "dur": 2.083, + "args": { + "External id": 981422,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938537983.208, "dur": 0.663, + "args": { + "External id": 981423,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938537985.630, "dur": 0.634, + "args": { + "External id": 981424,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938537990.158, "dur": 0.664, + "args": { + "External id": 981425,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938538032.564, "dur": 223.644, + "args": { + "External id": 981426,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938538051.327, "dur": 198.231, + "args": { + "External id": 981427,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938538113.301, "dur": 18.907, + "args": { + "External id": 981428,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938538136.531, "dur": 76.115, + "args": { + "External id": 981429,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938538139.885, "dur": 72.307, + "args": { + "External id": 981430,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538144.791, "dur": 6.980, + "args": { + "External id": 981431,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938538153.948, "dur": 57.574, + "args": { + "External id": 981432,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4023 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 2338708, "tid": 2379421, + "ts": 6345938538386.851, "dur": 717.725, + "args": { + "External id": 981433,"Record function id": 0, "Ev Idx": 4024 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338708, "tid": 2379421, + "ts": 6345938538406.021, "dur": 640.960, + "args": { + "External id": 981434,"Record function id": 0, "Ev Idx": 4025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938538469.203, "dur": 6.202, + "args": { + "External id": 981435,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938538493.782, "dur": 37.617, + "args": { + "External id": 981436,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538499.660, "dur": 1.823, + "args": { + "External id": 981437,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538503.923, "dur": 2.240, + "args": { + "External id": 981438,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538508.212, "dur": 0.318, + "args": { + "External id": 981439,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538510.226, "dur": 0.365, + "args": { + "External id": 981440,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538513.953, "dur": 0.412, + "args": { + "External id": 981441,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538515.868, "dur": 3.088, + "args": { + "External id": 981442,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538520.794, "dur": 0.481, + "args": { + "External id": 981443,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538523.489, "dur": 0.546, + "args": { + "External id": 981444,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538525.726, "dur": 0.407, + "args": { + "External id": 981445,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938538543.837, "dur": 51.110, + "args": { + "External id": 981446,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345938538629.139, "dur": 134.141, + "args": { + "External id": 981447,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938538640.670, "dur": 3.826, + "args": { + "External id": 981448,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345938538650.753, "dur": 11.478, + "args": { + "External id": 981449,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345938538655.597, "dur": 6.184, + "args": { + "External id": 981450,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538659.744, "dur": 0.707, + "args": { + "External id": 981451,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938538670.656, "dur": 27.734, + "args": { + "External id": 981452,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538673.571, "dur": 0.418, + "args": { + "External id": 981453,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538676.511, "dur": 0.676, + "args": { + "External id": 981454,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538678.664, "dur": 2.657, + "args": { + "External id": 981455,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538683.329, "dur": 0.700, + "args": { + "External id": 981456,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538685.574, "dur": 0.266, + "args": { + "External id": 981457,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538687.415, "dur": 0.571, + "args": { + "External id": 981458,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538690.189, "dur": 0.465, + "args": { + "External id": 981459,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538692.160, "dur": 0.360, + "args": { + "External id": 981460,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938538694.104, "dur": 0.357, + "args": { + "External id": 981461,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938538717.906, "dur": 36.679, + "args": { + "External id": 981462,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938538813.233, "dur": 136.862, + "args": { + "External id": 981463,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938538849.325, "dur": 97.102, + "args": { + "External id": 981464,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4055, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938538860.095, "dur": 81.685, + "args": { + "External id": 981465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938538969.094, "dur": 2.066, + "args": { + "External id": 981466,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4057, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938539124.455, "dur": 2088.275, + "args": { + "External id": 981467,"Sequence number": 10552264, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4058 + } + }, + { + "ph": "f", "id": 202, "pid": 2338708, "tid": 2379421, "ts": 6345938539124.455, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938539254.048, "dur": 124.043, + "args": { + "External id": 981468,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938539425.575, "dur": 46.571, + "args": { + "External id": 981469,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345938539492.201, "dur": 57.854, + "args": { + "External id": 981470,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938539563.649, "dur": 37.766, + "args": { + "External id": 981471,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938539609.120, "dur": 37.563, + "args": { + "External id": 981472,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938539654.825, "dur": 31.352, + "args": { + "External id": 981473,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938539695.656, "dur": 32.655, + "args": { + "External id": 981474,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938539759.339, "dur": 27.988, + "args": { + "External id": 981475,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938539808.750, "dur": 33.849, + "args": { + "External id": 981476,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938539871.415, "dur": 23.548, + "args": { + "External id": 981477,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938539909.380, "dur": 18.186, + "args": { + "External id": 981478,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938539937.656, "dur": 43.346, + "args": { + "External id": 981479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938539984.913, "dur": 59.219, + "args": { + "External id": 981480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345938540137.447, "dur": 304.886, + "args": { + "External id": 981481,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938540228.879, "dur": 7.261, + "args": { + "External id": 981482,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938540238.849, "dur": 3.916, + "args": { + "External id": 981483,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938540244.540, "dur": 2.069, + "args": { + "External id": 981484,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938540248.340, "dur": 1.779, + "args": { + "External id": 981485,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938540303.950, "dur": 5.375, + "args": { + "External id": 981486,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938540306.127, "dur": 3.014, + "args": { + "External id": 981487,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938540311.341, "dur": 41.364, + "args": { + "External id": 981488,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938540319.976, "dur": 3.862, + "args": { + "External id": 981489,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938540354.518, "dur": 2.187, + "args": { + "External id": 981490,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938540355.890, "dur": 0.718, + "args": { + "External id": 981491,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938540358.091, "dur": 20.290, + "args": { + "External id": 981492,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938540360.890, "dur": 1.893, + "args": { + "External id": 981493,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938540490.859, "dur": 39.140, + "args": { + "External id": 981494,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938540551.430, "dur": 17.470, + "args": { + "External id": 981495,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938540577.902, "dur": 61.387, + "args": { + "External id": 981496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938540646.552, "dur": 49.577, + "args": { + "External id": 981497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938540707.372, "dur": 26.094, + "args": { + "External id": 981498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938540740.395, "dur": 39.684, + "args": { + "External id": 981499,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938540788.876, "dur": 31.022, + "args": { + "External id": 981500,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938540826.993, "dur": 33.038, + "args": { + "External id": 981501,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345938540882.937, "dur": 26.161, + "args": { + "External id": 981502,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938540928.343, "dur": 27.012, + "args": { + "External id": 981503,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938540973.563, "dur": 18.865, + "args": { + "External id": 981504,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938541109.880, "dur": 24.300, + "args": { + "External id": 981505,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345938541158.215, "dur": 19.768, + "args": { + "External id": 981506,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541264.581, "dur": 16.611, + "args": { + "External id": 981507,"Record function id": 0, "Ev Idx": 4098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541267.985, "dur": 12.225, + "args": { + "External id": 981508,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541272.799, "dur": 6.299, + "args": { + "External id": 981509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541274.454, "dur": 4.507, + "args": { + "External id": 981510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541285.769, "dur": 8.709, + "args": { + "External id": 981511,"Record function id": 0, "Ev Idx": 4102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541287.488, "dur": 6.450, + "args": { + "External id": 981512,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541288.319, "dur": 5.069, + "args": { + "External id": 981513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541292.479, "dur": 0.798, + "args": { + "External id": 981514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541298.654, "dur": 7.226, + "args": { + "External id": 981515,"Record function id": 0, "Ev Idx": 4106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541300.046, "dur": 5.275, + "args": { + "External id": 981516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541300.658, "dur": 4.143, + "args": { + "External id": 981517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541301.681, "dur": 3.006, + "args": { + "External id": 981518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541309.714, "dur": 4.867, + "args": { + "External id": 981519,"Record function id": 0, "Ev Idx": 4110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541311.071, "dur": 2.947, + "args": { + "External id": 981520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541311.881, "dur": 1.580, + "args": { + "External id": 981521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541312.465, "dur": 0.910, + "args": { + "External id": 981522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541318.234, "dur": 4.323, + "args": { + "External id": 981523,"Record function id": 0, "Ev Idx": 4114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541319.793, "dur": 2.232, + "args": { + "External id": 981524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541320.325, "dur": 1.177, + "args": { + "External id": 981525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541320.683, "dur": 0.734, + "args": { + "External id": 981526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541326.319, "dur": 4.165, + "args": { + "External id": 981527,"Record function id": 0, "Ev Idx": 4118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541327.681, "dur": 2.333, + "args": { + "External id": 981528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541328.336, "dur": 1.218, + "args": { + "External id": 981529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541328.769, "dur": 0.693, + "args": { + "External id": 981530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541334.425, "dur": 4.421, + "args": { + "External id": 981531,"Record function id": 0, "Ev Idx": 4122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541336.014, "dur": 2.329, + "args": { + "External id": 981532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541336.669, "dur": 1.174, + "args": { + "External id": 981533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541337.033, "dur": 0.721, + "args": { + "External id": 981534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541342.637, "dur": 6.564, + "args": { + "External id": 981535,"Record function id": 0, "Ev Idx": 4126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541344.015, "dur": 4.680, + "args": { + "External id": 981536,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541344.545, "dur": 3.685, + "args": { + "External id": 981537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541347.384, "dur": 0.756, + "args": { + "External id": 981538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541352.921, "dur": 4.164, + "args": { + "External id": 981539,"Record function id": 0, "Ev Idx": 4130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938541354.319, "dur": 2.282, + "args": { + "External id": 981540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541354.935, "dur": 1.186, + "args": { + "External id": 981541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938541355.333, "dur": 0.698, + "args": { + "External id": 981542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938541362.226, "dur": 63606.220, + "args": { + "External id": 981543,"Record function id": 0, "Sequence number": 10552263, "Fwd thread id": 1, "Ev Idx": 4134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938541364.528, "dur": 63592.657, + "args": { + "External id": 981544,"Sequence number": 10552263, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4135 + } + }, + { + "ph": "f", "id": 203, "pid": 2338708, "tid": 2379421, "ts": 6345938541364.528, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 2338708, "tid": 2379421, + "ts": 6345938541398.044, "dur": 44.834, + "args": { + "External id": 981545,"Record function id": 0, "Ev Idx": 4136 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 2338708, "tid": 2379421, + "ts": 6345938541452.656, "dur": 75.024, + "args": { + "External id": 981546,"Record function id": 0, "Ev Idx": 4137 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 2338708, "tid": 2379421, + "ts": 6345938541535.152, "dur": 63412.754, + "args": { + "External id": 981547,"Record function id": 0, "Ev Idx": 4138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938541636.960, "dur": 8.741, + "args": { + "External id": 981548,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938541657.393, "dur": 7.340, + "args": { + "External id": 981549,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938541680.828, "dur": 62307.550, + "args": { + "External id": 981550,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938541696.804, "dur": 62276.983, + "args": { + "External id": 981551,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938541797.484, "dur": 18.742, + "args": { + "External id": 981552,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938541841.403, "dur": 62080.513, + "args": { + "External id": 981553,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938541847.096, "dur": 62073.762, + "args": { + "External id": 981554,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938541852.544, "dur": 10.152, + "args": { + "External id": 981555,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938541865.023, "dur": 62049.996, + "args": { + "External id": 981556,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938604159.587, "dur": 14.333, + "args": { + "External id": 981557,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938604163.860, "dur": 9.355, + "args": { + "External id": 981558,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938604210.236, "dur": 409.211, + "args": { + "External id": 981559,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938604247.958, "dur": 365.475, + "args": { + "External id": 981560,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4151, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938604261.169, "dur": 346.257, + "args": { + "External id": 981561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938604644.563, "dur": 2.561, + "args": { + "External id": 981562,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4153, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938604715.648, "dur": 7.503, + "args": { + "External id": 981563,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938604772.043, "dur": 1.718, + "args": { + "External id": 981564,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938604790.495, "dur": 3.852, + "args": { + "External id": 981565,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938604807.418, "dur": 0.916, + "args": { + "External id": 981566,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938604822.787, "dur": 1.028, + "args": { + "External id": 981567,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938604835.209, "dur": 1.143, + "args": { + "External id": 981568,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938604848.281, "dur": 3.075, + "args": { + "External id": 981569,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938604862.840, "dur": 2.293, + "args": { + "External id": 981570,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938604876.280, "dur": 0.636, + "args": { + "External id": 981571,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938604985.868, "dur": 3383.971, + "args": { + "External id": 981572,"Record function id": 0, "Ev Idx": 4163 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 2338708, "tid": 2379421, + "ts": 6345938605033.942, "dur": 1279.384, + "args": { + "External id": 981573,"Record function id": 0, "Ev Idx": 4164 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338708, "tid": 2379421, + "ts": 6345938605092.362, "dur": 396.129, + "args": { + "External id": 981574,"Record function id": 0, "Ev Idx": 4165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938605202.213, "dur": 5.951, + "args": { + "External id": 981575,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938605212.112, "dur": 0.719, + "args": { + "External id": 981576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938605214.999, "dur": 3.291, + "args": { + "External id": 981577,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938605220.585, "dur": 0.755, + "args": { + "External id": 981578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938605223.130, "dur": 0.974, + "args": { + "External id": 981579,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938605226.039, "dur": 0.650, + "args": { + "External id": 981580,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938605228.476, "dur": 1.916, + "args": { + "External id": 981581,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938605234.198, "dur": 0.745, + "args": { + "External id": 981582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938605240.883, "dur": 0.634, + "args": { + "External id": 981583,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938605243.221, "dur": 0.540, + "args": { + "External id": 981584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938605266.280, "dur": 184.701, + "args": { + "External id": 981585,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938605285.357, "dur": 160.207, + "args": { + "External id": 981586,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938605304.189, "dur": 19.282, + "args": { + "External id": 981587,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938605330.492, "dur": 80.632, + "args": { + "External id": 981588,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938605333.507, "dur": 77.185, + "args": { + "External id": 981589,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605338.195, "dur": 6.188, + "args": { + "External id": 981590,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938605346.422, "dur": 63.304, + "args": { + "External id": 981591,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4182 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 2338708, "tid": 2379421, + "ts": 6345938605578.184, "dur": 726.281, + "args": { + "External id": 981592,"Record function id": 0, "Ev Idx": 4183 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338708, "tid": 2379421, + "ts": 6345938605598.043, "dur": 692.073, + "args": { + "External id": 981593,"Record function id": 0, "Ev Idx": 4184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938605657.780, "dur": 5.601, + "args": { + "External id": 981594,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938605680.865, "dur": 35.280, + "args": { + "External id": 981595,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605686.385, "dur": 1.993, + "args": { + "External id": 981596,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605690.821, "dur": 1.806, + "args": { + "External id": 981597,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605694.414, "dur": 0.553, + "args": { + "External id": 981598,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605696.729, "dur": 0.410, + "args": { + "External id": 981599,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605700.269, "dur": 0.447, + "args": { + "External id": 981600,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605702.008, "dur": 2.317, + "args": { + "External id": 981601,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605706.000, "dur": 0.381, + "args": { + "External id": 981602,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605708.795, "dur": 0.277, + "args": { + "External id": 981603,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605710.750, "dur": 0.592, + "args": { + "External id": 981604,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938605727.802, "dur": 51.869, + "args": { + "External id": 981605,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345938605816.527, "dur": 123.405, + "args": { + "External id": 981606,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938605827.326, "dur": 4.289, + "args": { + "External id": 981607,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345938605837.349, "dur": 11.737, + "args": { + "External id": 981608,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345938605842.397, "dur": 6.190, + "args": { + "External id": 981609,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605846.632, "dur": 0.613, + "args": { + "External id": 981610,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938605856.672, "dur": 27.452, + "args": { + "External id": 981611,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605859.478, "dur": 0.627, + "args": { + "External id": 981612,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605862.688, "dur": 0.469, + "args": { + "External id": 981613,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605864.903, "dur": 2.769, + "args": { + "External id": 981614,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605868.990, "dur": 1.202, + "args": { + "External id": 981615,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605871.632, "dur": 0.309, + "args": { + "External id": 981616,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605873.421, "dur": 0.405, + "args": { + "External id": 981617,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605876.013, "dur": 0.292, + "args": { + "External id": 981618,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605877.839, "dur": 0.274, + "args": { + "External id": 981619,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938605879.631, "dur": 0.388, + "args": { + "External id": 981620,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938605894.972, "dur": 36.047, + "args": { + "External id": 981621,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938605990.045, "dur": 207.005, + "args": { + "External id": 981622,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938606042.659, "dur": 149.857, + "args": { + "External id": 981623,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4214, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938606098.763, "dur": 88.765, + "args": { + "External id": 981624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938606222.096, "dur": 2.414, + "args": { + "External id": 981625,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4216, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938606321.762, "dur": 2025.011, + "args": { + "External id": 981626,"Sequence number": 10552262, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4217 + } + }, + { + "ph": "f", "id": 204, "pid": 2338708, "tid": 2379421, "ts": 6345938606321.762, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938606448.174, "dur": 118.176, + "args": { + "External id": 981627,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938606612.700, "dur": 46.088, + "args": { + "External id": 981628,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345938606677.858, "dur": 56.848, + "args": { + "External id": 981629,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938606749.212, "dur": 36.339, + "args": { + "External id": 981630,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938606793.004, "dur": 36.872, + "args": { + "External id": 981631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938606837.295, "dur": 31.759, + "args": { + "External id": 981632,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938606876.903, "dur": 33.107, + "args": { + "External id": 981633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938606939.467, "dur": 26.750, + "args": { + "External id": 981634,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938606987.273, "dur": 52.861, + "args": { + "External id": 981635,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938607112.647, "dur": 27.404, + "args": { + "External id": 981636,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938607158.403, "dur": 18.903, + "args": { + "External id": 981637,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938607186.164, "dur": 48.398, + "args": { + "External id": 981638,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938607239.042, "dur": 37.137, + "args": { + "External id": 981639,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345938607312.271, "dur": 275.053, + "args": { + "External id": 981640,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938607400.898, "dur": 7.245, + "args": { + "External id": 981641,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938607410.522, "dur": 3.263, + "args": { + "External id": 981642,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938607415.363, "dur": 2.092, + "args": { + "External id": 981643,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938607418.975, "dur": 1.727, + "args": { + "External id": 981644,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938607466.398, "dur": 6.667, + "args": { + "External id": 981645,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938607468.370, "dur": 4.347, + "args": { + "External id": 981646,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938607477.466, "dur": 37.192, + "args": { + "External id": 981647,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938607483.864, "dur": 4.558, + "args": { + "External id": 981648,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938607516.915, "dur": 2.589, + "args": { + "External id": 981649,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938607518.724, "dur": 0.663, + "args": { + "External id": 981650,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938607520.928, "dur": 17.796, + "args": { + "External id": 981651,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938607524.734, "dur": 0.485, + "args": { + "External id": 981652,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938607649.960, "dur": 36.684, + "args": { + "External id": 981653,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938607709.003, "dur": 18.746, + "args": { + "External id": 981654,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938607736.624, "dur": 48.711, + "args": { + "External id": 981655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938607792.938, "dur": 48.309, + "args": { + "External id": 981656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938607852.986, "dur": 28.605, + "args": { + "External id": 981657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938607887.793, "dur": 36.277, + "args": { + "External id": 981658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938607932.823, "dur": 33.127, + "args": { + "External id": 981659,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938607973.017, "dur": 34.130, + "args": { + "External id": 981660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345938608093.802, "dur": 33.880, + "args": { + "External id": 981661,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938608148.879, "dur": 29.416, + "args": { + "External id": 981662,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938608199.630, "dur": 20.714, + "args": { + "External id": 981663,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938608255.461, "dur": 18.636, + "args": { + "External id": 981664,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345938608293.678, "dur": 17.970, + "args": { + "External id": 981665,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608394.653, "dur": 22.947, + "args": { + "External id": 981666,"Record function id": 0, "Ev Idx": 4257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608398.839, "dur": 17.696, + "args": { + "External id": 981667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608407.457, "dur": 7.875, + "args": { + "External id": 981668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608408.952, "dur": 6.254, + "args": { + "External id": 981669,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608422.222, "dur": 5.637, + "args": { + "External id": 981670,"Record function id": 0, "Ev Idx": 4261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608424.246, "dur": 3.061, + "args": { + "External id": 981671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608425.113, "dur": 1.577, + "args": { + "External id": 981672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608425.716, "dur": 0.802, + "args": { + "External id": 981673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608431.778, "dur": 7.458, + "args": { + "External id": 981674,"Record function id": 0, "Ev Idx": 4265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608433.256, "dur": 5.407, + "args": { + "External id": 981675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608433.866, "dur": 4.255, + "args": { + "External id": 981676,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608434.697, "dur": 3.305, + "args": { + "External id": 981677,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608443.180, "dur": 4.491, + "args": { + "External id": 981678,"Record function id": 0, "Ev Idx": 4269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608444.766, "dur": 2.383, + "args": { + "External id": 981679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608445.435, "dur": 1.198, + "args": { + "External id": 981680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608445.773, "dur": 0.773, + "args": { + "External id": 981681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608451.367, "dur": 4.685, + "args": { + "External id": 981682,"Record function id": 0, "Ev Idx": 4273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608453.060, "dur": 2.490, + "args": { + "External id": 981683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608453.648, "dur": 1.384, + "args": { + "External id": 981684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608454.203, "dur": 0.740, + "args": { + "External id": 981685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608459.843, "dur": 8.356, + "args": { + "External id": 981686,"Record function id": 0, "Ev Idx": 4277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608461.097, "dur": 3.060, + "args": { + "External id": 981687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608462.115, "dur": 1.535, + "args": { + "External id": 981688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608462.892, "dur": 0.670, + "args": { + "External id": 981689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608472.190, "dur": 4.946, + "args": { + "External id": 981690,"Record function id": 0, "Ev Idx": 4281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608474.154, "dur": 2.489, + "args": { + "External id": 981691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608474.709, "dur": 1.411, + "args": { + "External id": 981692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608475.274, "dur": 0.756, + "args": { + "External id": 981693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608480.911, "dur": 4.618, + "args": { + "External id": 981694,"Record function id": 0, "Ev Idx": 4285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608482.287, "dur": 2.760, + "args": { + "External id": 981695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608482.960, "dur": 1.592, + "args": { + "External id": 981696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608483.626, "dur": 0.851, + "args": { + "External id": 981697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608489.301, "dur": 6.750, + "args": { + "External id": 981698,"Record function id": 0, "Ev Idx": 4289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938608490.802, "dur": 4.775, + "args": { + "External id": 981699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608491.300, "dur": 3.787, + "args": { + "External id": 981700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938608494.274, "dur": 0.732, + "args": { + "External id": 981701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938608501.120, "dur": 65116.899, + "args": { + "External id": 981702,"Record function id": 0, "Sequence number": 10552261, "Fwd thread id": 1, "Ev Idx": 4293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938608503.538, "dur": 65103.966, + "args": { + "External id": 981703,"Sequence number": 10552261, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4294 + } + }, + { + "ph": "f", "id": 205, "pid": 2338708, "tid": 2379421, "ts": 6345938608503.538, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 2338708, "tid": 2379421, + "ts": 6345938608537.013, "dur": 42.922, + "args": { + "External id": 981704,"Record function id": 0, "Ev Idx": 4295 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 2338708, "tid": 2379421, + "ts": 6345938608588.947, "dur": 74.034, + "args": { + "External id": 981705,"Record function id": 0, "Ev Idx": 4296 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 2338708, "tid": 2379421, + "ts": 6345938608672.842, "dur": 64925.399, + "args": { + "External id": 981706,"Record function id": 0, "Ev Idx": 4297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938608776.622, "dur": 7.739, + "args": { + "External id": 981707,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938608795.266, "dur": 7.755, + "args": { + "External id": 981708,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938608820.064, "dur": 63759.337, + "args": { + "External id": 981709,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938608836.758, "dur": 63727.839, + "args": { + "External id": 981710,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938608938.720, "dur": 20.240, + "args": { + "External id": 981711,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938608981.759, "dur": 63531.104, + "args": { + "External id": 981712,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938608984.608, "dur": 63526.801, + "args": { + "External id": 981713,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938608990.202, "dur": 11.620, + "args": { + "External id": 981714,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938609006.862, "dur": 63499.211, + "args": { + "External id": 981715,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938672697.476, "dur": 12.962, + "args": { + "External id": 981716,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938672701.343, "dur": 8.578, + "args": { + "External id": 981717,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938672744.863, "dur": 508.458, + "args": { + "External id": 981718,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938672782.076, "dur": 464.258, + "args": { + "External id": 981719,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4310, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938672795.791, "dur": 443.230, + "args": { + "External id": 981720,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938673284.842, "dur": 2.771, + "args": { + "External id": 981721,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4312, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938673365.368, "dur": 7.826, + "args": { + "External id": 981722,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938673423.903, "dur": 1.383, + "args": { + "External id": 981723,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938673443.215, "dur": 3.857, + "args": { + "External id": 981724,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938673460.093, "dur": 1.111, + "args": { + "External id": 981725,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938673472.769, "dur": 1.136, + "args": { + "External id": 981726,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938673484.975, "dur": 0.842, + "args": { + "External id": 981727,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938673499.976, "dur": 3.346, + "args": { + "External id": 981728,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938673515.390, "dur": 2.721, + "args": { + "External id": 981729,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938673528.774, "dur": 1.085, + "args": { + "External id": 981730,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938673634.433, "dur": 3276.861, + "args": { + "External id": 981731,"Record function id": 0, "Ev Idx": 4322 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 2338708, "tid": 2379421, + "ts": 6345938673657.221, "dur": 1235.624, + "args": { + "External id": 981732,"Record function id": 0, "Ev Idx": 4323 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338708, "tid": 2379421, + "ts": 6345938673675.493, "dur": 433.122, + "args": { + "External id": 981733,"Record function id": 0, "Ev Idx": 4324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938673769.384, "dur": 5.279, + "args": { + "External id": 981734,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938673778.653, "dur": 0.933, + "args": { + "External id": 981735,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938673781.632, "dur": 3.588, + "args": { + "External id": 981736,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938673787.511, "dur": 0.815, + "args": { + "External id": 981737,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938673792.261, "dur": 1.030, + "args": { + "External id": 981738,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938673794.920, "dur": 0.669, + "args": { + "External id": 981739,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938673797.403, "dur": 2.007, + "args": { + "External id": 981740,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938673801.220, "dur": 0.790, + "args": { + "External id": 981741,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938673805.767, "dur": 0.786, + "args": { + "External id": 981742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938673808.324, "dur": 1.085, + "args": { + "External id": 981743,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938673830.208, "dur": 176.775, + "args": { + "External id": 981744,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938673848.459, "dur": 153.470, + "args": { + "External id": 981745,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938673870.763, "dur": 19.160, + "args": { + "External id": 981746,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938673893.908, "dur": 76.561, + "args": { + "External id": 981747,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938673896.974, "dur": 73.054, + "args": { + "External id": 981748,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938673901.673, "dur": 6.860, + "args": { + "External id": 981749,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938673913.112, "dur": 56.267, + "args": { + "External id": 981750,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 2338708, "tid": 2379421, + "ts": 6345938674209.412, "dur": 674.709, + "args": { + "External id": 981751,"Record function id": 0, "Ev Idx": 4342 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338708, "tid": 2379421, + "ts": 6345938674231.041, "dur": 639.297, + "args": { + "External id": 981752,"Record function id": 0, "Ev Idx": 4343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938674298.375, "dur": 6.910, + "args": { + "External id": 981753,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938674324.148, "dur": 38.661, + "args": { + "External id": 981754,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674329.861, "dur": 3.764, + "args": { + "External id": 981755,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674335.914, "dur": 0.687, + "args": { + "External id": 981756,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674338.593, "dur": 0.399, + "args": { + "External id": 981757,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674341.886, "dur": 0.300, + "args": { + "External id": 981758,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674343.483, "dur": 0.776, + "args": { + "External id": 981759,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674345.744, "dur": 2.592, + "args": { + "External id": 981760,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674351.478, "dur": 0.404, + "args": { + "External id": 981761,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674353.370, "dur": 0.501, + "args": { + "External id": 981762,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674355.277, "dur": 1.746, + "args": { + "External id": 981763,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938674375.866, "dur": 53.051, + "args": { + "External id": 981764,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345938674465.831, "dur": 137.177, + "args": { + "External id": 981765,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938674477.402, "dur": 4.257, + "args": { + "External id": 981766,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345938674487.758, "dur": 11.631, + "args": { + "External id": 981767,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345938674492.494, "dur": 6.402, + "args": { + "External id": 981768,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674496.655, "dur": 0.709, + "args": { + "External id": 981769,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938674507.783, "dur": 33.440, + "args": { + "External id": 981770,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674510.399, "dur": 0.424, + "args": { + "External id": 981771,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674513.260, "dur": 0.318, + "args": { + "External id": 981772,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674518.835, "dur": 3.179, + "args": { + "External id": 981773,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674523.608, "dur": 0.587, + "args": { + "External id": 981774,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674526.067, "dur": 0.657, + "args": { + "External id": 981775,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674529.524, "dur": 0.354, + "args": { + "External id": 981776,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674531.812, "dur": 0.529, + "args": { + "External id": 981777,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674534.157, "dur": 0.395, + "args": { + "External id": 981778,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938674536.834, "dur": 0.355, + "args": { + "External id": 981779,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938674556.007, "dur": 38.255, + "args": { + "External id": 981780,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938674653.663, "dur": 138.630, + "args": { + "External id": 981781,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938674689.529, "dur": 98.556, + "args": { + "External id": 981782,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4373, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938674700.707, "dur": 82.749, + "args": { + "External id": 981783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938674812.294, "dur": 2.307, + "args": { + "External id": 981784,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4375, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938674901.928, "dur": 1987.215, + "args": { + "External id": 981785,"Sequence number": 10552260, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4376 + } + }, + { + "ph": "f", "id": 206, "pid": 2338708, "tid": 2379421, "ts": 6345938674901.928, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938675050.767, "dur": 159.790, + "args": { + "External id": 981786,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938675273.575, "dur": 44.912, + "args": { + "External id": 981787,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345938675339.143, "dur": 55.829, + "args": { + "External id": 981788,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938675409.858, "dur": 36.826, + "args": { + "External id": 981789,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938675454.245, "dur": 35.704, + "args": { + "External id": 981790,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938675497.599, "dur": 30.924, + "args": { + "External id": 981791,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938675535.885, "dur": 33.810, + "args": { + "External id": 981792,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938675602.272, "dur": 24.985, + "args": { + "External id": 981793,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938675649.359, "dur": 31.418, + "args": { + "External id": 981794,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938675706.927, "dur": 22.436, + "args": { + "External id": 981795,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938675745.619, "dur": 18.041, + "args": { + "External id": 981796,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938675772.147, "dur": 42.185, + "args": { + "External id": 981797,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938675818.238, "dur": 36.545, + "args": { + "External id": 981798,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345938675889.850, "dur": 353.259, + "args": { + "External id": 981799,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938675977.640, "dur": 6.760, + "args": { + "External id": 981800,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938675986.934, "dur": 3.076, + "args": { + "External id": 981801,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938675991.423, "dur": 1.710, + "args": { + "External id": 981802,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938675994.988, "dur": 2.442, + "args": { + "External id": 981803,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938676107.234, "dur": 6.605, + "args": { + "External id": 981804,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938676109.567, "dur": 3.809, + "args": { + "External id": 981805,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938676115.943, "dur": 38.475, + "args": { + "External id": 981806,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938676123.179, "dur": 3.982, + "args": { + "External id": 981807,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938676156.147, "dur": 2.072, + "args": { + "External id": 981808,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938676157.181, "dur": 0.939, + "args": { + "External id": 981809,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938676159.522, "dur": 16.017, + "args": { + "External id": 981810,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938676161.747, "dur": 0.714, + "args": { + "External id": 981811,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938676292.559, "dur": 38.426, + "args": { + "External id": 981812,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938676353.492, "dur": 17.748, + "args": { + "External id": 981813,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938676380.221, "dur": 60.903, + "args": { + "External id": 981814,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938676448.871, "dur": 43.293, + "args": { + "External id": 981815,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938676504.121, "dur": 24.361, + "args": { + "External id": 981816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938676535.590, "dur": 34.309, + "args": { + "External id": 981817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938676577.990, "dur": 30.298, + "args": { + "External id": 981818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938676616.099, "dur": 33.845, + "args": { + "External id": 981819,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345938676674.109, "dur": 26.802, + "args": { + "External id": 981820,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938676719.469, "dur": 27.963, + "args": { + "External id": 981821,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938676766.115, "dur": 18.787, + "args": { + "External id": 981822,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938676807.189, "dur": 15.717, + "args": { + "External id": 981823,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345938676837.877, "dur": 17.683, + "args": { + "External id": 981824,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938676936.449, "dur": 16.326, + "args": { + "External id": 981825,"Record function id": 0, "Ev Idx": 4416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938676940.020, "dur": 11.844, + "args": { + "External id": 981826,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938676944.948, "dur": 5.832, + "args": { + "External id": 981827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938676946.709, "dur": 3.940, + "args": { + "External id": 981828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938676957.627, "dur": 6.127, + "args": { + "External id": 981829,"Record function id": 0, "Ev Idx": 4420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938676959.403, "dur": 3.673, + "args": { + "External id": 981830,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938676960.753, "dur": 1.777, + "args": { + "External id": 981831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938676961.490, "dur": 0.911, + "args": { + "External id": 981832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938676967.636, "dur": 7.836, + "args": { + "External id": 981833,"Record function id": 0, "Ev Idx": 4424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938676969.443, "dur": 5.564, + "args": { + "External id": 981834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938676970.093, "dur": 4.454, + "args": { + "External id": 981835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938676970.950, "dur": 3.499, + "args": { + "External id": 981836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938676979.757, "dur": 4.866, + "args": { + "External id": 981837,"Record function id": 0, "Ev Idx": 4428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938676981.084, "dur": 3.050, + "args": { + "External id": 981838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938676982.047, "dur": 1.622, + "args": { + "External id": 981839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938676982.720, "dur": 0.842, + "args": { + "External id": 981840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938676988.376, "dur": 4.611, + "args": { + "External id": 981841,"Record function id": 0, "Ev Idx": 4432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938676989.742, "dur": 2.752, + "args": { + "External id": 981842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938676990.430, "dur": 1.560, + "args": { + "External id": 981843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938676990.852, "dur": 1.051, + "args": { + "External id": 981844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938676996.663, "dur": 4.536, + "args": { + "External id": 981845,"Record function id": 0, "Ev Idx": 4436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938676998.213, "dur": 2.481, + "args": { + "External id": 981846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938676998.743, "dur": 1.495, + "args": { + "External id": 981847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938676999.133, "dur": 1.018, + "args": { + "External id": 981848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938677005.287, "dur": 28.566, + "args": { + "External id": 981849,"Record function id": 0, "Ev Idx": 4440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938677006.783, "dur": 25.867, + "args": { + "External id": 981850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938677025.956, "dur": 5.600, + "args": { + "External id": 981851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938677029.457, "dur": 1.799, + "args": { + "External id": 981852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938677040.070, "dur": 5.405, + "args": { + "External id": 981853,"Record function id": 0, "Ev Idx": 4444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938677041.750, "dur": 3.206, + "args": { + "External id": 981854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938677042.950, "dur": 1.540, + "args": { + "External id": 981855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938677043.632, "dur": 0.767, + "args": { + "External id": 981856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938677049.376, "dur": 47.119, + "args": { + "External id": 981857,"Record function id": 0, "Ev Idx": 4448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938677051.107, "dur": 43.748, + "args": { + "External id": 981858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938677051.772, "dur": 41.864, + "args": { + "External id": 981859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938677091.510, "dur": 1.807, + "args": { + "External id": 981860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938677103.971, "dur": 64804.874, + "args": { + "External id": 981861,"Record function id": 0, "Sequence number": 10552259, "Fwd thread id": 1, "Ev Idx": 4452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938677106.347, "dur": 64791.766, + "args": { + "External id": 981862,"Sequence number": 10552259, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4453 + } + }, + { + "ph": "f", "id": 207, "pid": 2338708, "tid": 2379421, "ts": 6345938677106.347, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 2338708, "tid": 2379421, + "ts": 6345938677142.277, "dur": 42.925, + "args": { + "External id": 981863,"Record function id": 0, "Ev Idx": 4454 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 2338708, "tid": 2379421, + "ts": 6345938677194.516, "dur": 76.574, + "args": { + "External id": 981864,"Record function id": 0, "Ev Idx": 4455 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 2338708, "tid": 2379421, + "ts": 6345938677278.463, "dur": 64610.446, + "args": { + "External id": 981865,"Record function id": 0, "Ev Idx": 4456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938677380.862, "dur": 8.584, + "args": { + "External id": 981866,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938677400.456, "dur": 7.698, + "args": { + "External id": 981867,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938677427.375, "dur": 63508.992, + "args": { + "External id": 981868,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938677442.976, "dur": 63479.043, + "args": { + "External id": 981869,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938677553.431, "dur": 18.660, + "args": { + "External id": 981870,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938677594.888, "dur": 63277.590, + "args": { + "External id": 981871,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938677598.145, "dur": 63273.258, + "args": { + "External id": 981872,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938677604.472, "dur": 10.347, + "args": { + "External id": 981873,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938677617.103, "dur": 63248.992, + "args": { + "External id": 981874,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938741097.756, "dur": 14.089, + "args": { + "External id": 981875,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938741101.626, "dur": 9.526, + "args": { + "External id": 981876,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938741147.702, "dur": 414.641, + "args": { + "External id": 981877,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938741185.210, "dur": 370.551, + "args": { + "External id": 981878,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4469, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938741199.546, "dur": 348.364, + "args": { + "External id": 981879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938741587.559, "dur": 2.895, + "args": { + "External id": 981880,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4471, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938741658.478, "dur": 7.931, + "args": { + "External id": 981881,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938741716.671, "dur": 1.677, + "args": { + "External id": 981882,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938741735.575, "dur": 3.359, + "args": { + "External id": 981883,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938741752.305, "dur": 1.135, + "args": { + "External id": 981884,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938741767.339, "dur": 1.202, + "args": { + "External id": 981885,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938741779.841, "dur": 1.000, + "args": { + "External id": 981886,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938741792.863, "dur": 2.956, + "args": { + "External id": 981887,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938741807.194, "dur": 2.434, + "args": { + "External id": 981888,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938741821.137, "dur": 1.158, + "args": { + "External id": 981889,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938741925.609, "dur": 3377.861, + "args": { + "External id": 981890,"Record function id": 0, "Ev Idx": 4481 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 2338708, "tid": 2379421, + "ts": 6345938741949.660, "dur": 1270.706, + "args": { + "External id": 981891,"Record function id": 0, "Ev Idx": 4482 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338708, "tid": 2379421, + "ts": 6345938741965.357, "dur": 438.579, + "args": { + "External id": 981892,"Record function id": 0, "Ev Idx": 4483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938742122.394, "dur": 6.355, + "args": { + "External id": 981893,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938742133.333, "dur": 0.673, + "args": { + "External id": 981894,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938742136.245, "dur": 3.467, + "args": { + "External id": 981895,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938742141.671, "dur": 0.830, + "args": { + "External id": 981896,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938742144.141, "dur": 0.921, + "args": { + "External id": 981897,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938742146.788, "dur": 1.261, + "args": { + "External id": 981898,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938742149.910, "dur": 2.235, + "args": { + "External id": 981899,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938742156.446, "dur": 0.900, + "args": { + "External id": 981900,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938742159.147, "dur": 1.094, + "args": { + "External id": 981901,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938742162.053, "dur": 0.591, + "args": { + "External id": 981902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938742185.693, "dur": 181.999, + "args": { + "External id": 981903,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938742204.289, "dur": 157.876, + "args": { + "External id": 981904,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938742227.071, "dur": 20.728, + "args": { + "External id": 981905,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938742254.338, "dur": 76.047, + "args": { + "External id": 981906,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938742257.665, "dur": 72.179, + "args": { + "External id": 981907,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742262.366, "dur": 6.384, + "args": { + "External id": 981908,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938742270.873, "dur": 58.277, + "args": { + "External id": 981909,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4500 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 2338708, "tid": 2379421, + "ts": 6345938742492.617, "dur": 718.317, + "args": { + "External id": 981910,"Record function id": 0, "Ev Idx": 4501 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338708, "tid": 2379421, + "ts": 6345938742512.684, "dur": 683.722, + "args": { + "External id": 981911,"Record function id": 0, "Ev Idx": 4502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938742574.981, "dur": 6.105, + "args": { + "External id": 981912,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938742599.265, "dur": 39.100, + "args": { + "External id": 981913,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742604.789, "dur": 3.345, + "args": { + "External id": 981914,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742610.205, "dur": 0.613, + "args": { + "External id": 981915,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742612.564, "dur": 0.372, + "args": { + "External id": 981916,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742616.638, "dur": 0.428, + "args": { + "External id": 981917,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742618.618, "dur": 0.651, + "args": { + "External id": 981918,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742621.242, "dur": 2.421, + "args": { + "External id": 981919,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742627.033, "dur": 0.578, + "args": { + "External id": 981920,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742629.128, "dur": 0.519, + "args": { + "External id": 981921,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742631.156, "dur": 2.214, + "args": { + "External id": 981922,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938742649.930, "dur": 48.556, + "args": { + "External id": 981923,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345938742732.821, "dur": 126.884, + "args": { + "External id": 981924,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938742744.465, "dur": 4.226, + "args": { + "External id": 981925,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345938742754.580, "dur": 11.805, + "args": { + "External id": 981926,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345938742759.761, "dur": 6.175, + "args": { + "External id": 981927,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742763.705, "dur": 0.753, + "args": { + "External id": 981928,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938742774.722, "dur": 29.324, + "args": { + "External id": 981929,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742777.037, "dur": 0.633, + "args": { + "External id": 981930,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742779.909, "dur": 0.463, + "args": { + "External id": 981931,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742781.876, "dur": 2.932, + "args": { + "External id": 981932,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742786.491, "dur": 0.416, + "args": { + "External id": 981933,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742788.486, "dur": 0.483, + "args": { + "External id": 981934,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742792.010, "dur": 0.527, + "args": { + "External id": 981935,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742793.836, "dur": 0.529, + "args": { + "External id": 981936,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742796.123, "dur": 0.562, + "args": { + "External id": 981937,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938742799.201, "dur": 0.629, + "args": { + "External id": 981938,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938742814.750, "dur": 36.397, + "args": { + "External id": 981939,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938742909.978, "dur": 195.815, + "args": { + "External id": 981940,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938742942.448, "dur": 158.361, + "args": { + "External id": 981941,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4532, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938742952.323, "dur": 141.636, + "args": { + "External id": 981942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938743130.172, "dur": 3.040, + "args": { + "External id": 981943,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4534, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938743228.310, "dur": 2053.138, + "args": { + "External id": 981944,"Sequence number": 10552258, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4535 + } + }, + { + "ph": "f", "id": 208, "pid": 2338708, "tid": 2379421, "ts": 6345938743228.310, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938743356.057, "dur": 121.468, + "args": { + "External id": 981945,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938743525.370, "dur": 45.083, + "args": { + "External id": 981946,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345938743604.263, "dur": 57.636, + "args": { + "External id": 981947,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938743677.602, "dur": 37.464, + "args": { + "External id": 981948,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938743722.580, "dur": 37.687, + "args": { + "External id": 981949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938743767.988, "dur": 32.354, + "args": { + "External id": 981950,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938743808.290, "dur": 32.126, + "args": { + "External id": 981951,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938743876.448, "dur": 27.163, + "args": { + "External id": 981952,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938743928.044, "dur": 32.427, + "args": { + "External id": 981953,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938743985.268, "dur": 40.246, + "args": { + "External id": 981954,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938744046.981, "dur": 61.745, + "args": { + "External id": 981955,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938744121.707, "dur": 50.719, + "args": { + "External id": 981956,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938744176.858, "dur": 40.346, + "args": { + "External id": 981957,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345938744257.658, "dur": 309.785, + "args": { + "External id": 981958,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938744349.509, "dur": 6.557, + "args": { + "External id": 981959,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938744358.794, "dur": 3.075, + "args": { + "External id": 981960,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938744363.616, "dur": 2.387, + "args": { + "External id": 981961,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938744367.536, "dur": 2.548, + "args": { + "External id": 981962,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938744421.127, "dur": 11.252, + "args": { + "External id": 981963,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938744429.055, "dur": 3.122, + "args": { + "External id": 981964,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938744434.326, "dur": 37.324, + "args": { + "External id": 981965,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938744441.371, "dur": 3.502, + "args": { + "External id": 981966,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938744473.586, "dur": 2.160, + "args": { + "External id": 981967,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938744474.712, "dur": 0.936, + "args": { + "External id": 981968,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938744476.864, "dur": 22.782, + "args": { + "External id": 981969,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938744484.813, "dur": 0.630, + "args": { + "External id": 981970,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938744615.691, "dur": 41.659, + "args": { + "External id": 981971,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938744678.725, "dur": 23.390, + "args": { + "External id": 981972,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938744711.014, "dur": 48.008, + "args": { + "External id": 981973,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938744766.475, "dur": 43.238, + "args": { + "External id": 981974,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938744821.045, "dur": 26.656, + "args": { + "External id": 981975,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938744854.164, "dur": 35.002, + "args": { + "External id": 981976,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938744897.393, "dur": 32.113, + "args": { + "External id": 981977,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938744938.258, "dur": 33.456, + "args": { + "External id": 981978,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345938744995.451, "dur": 47.307, + "args": { + "External id": 981979,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938745104.795, "dur": 30.389, + "args": { + "External id": 981980,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938745156.128, "dur": 18.870, + "args": { + "External id": 981981,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938745199.050, "dur": 15.281, + "args": { + "External id": 981982,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345938745229.354, "dur": 18.178, + "args": { + "External id": 981983,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745329.322, "dur": 17.878, + "args": { + "External id": 981984,"Record function id": 0, "Ev Idx": 4575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745333.598, "dur": 12.520, + "args": { + "External id": 981985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745338.764, "dur": 5.934, + "args": { + "External id": 981986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745340.421, "dur": 4.160, + "args": { + "External id": 981987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745352.124, "dur": 6.008, + "args": { + "External id": 981988,"Record function id": 0, "Ev Idx": 4579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745354.322, "dur": 3.237, + "args": { + "External id": 981989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745354.967, "dur": 2.036, + "args": { + "External id": 981990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745355.755, "dur": 1.145, + "args": { + "External id": 981991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745362.325, "dur": 7.794, + "args": { + "External id": 981992,"Record function id": 0, "Ev Idx": 4583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745363.809, "dur": 5.786, + "args": { + "External id": 981993,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745364.956, "dur": 4.156, + "args": { + "External id": 981994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745365.747, "dur": 3.235, + "args": { + "External id": 981995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745373.966, "dur": 4.768, + "args": { + "External id": 981996,"Record function id": 0, "Ev Idx": 4587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745375.368, "dur": 2.866, + "args": { + "External id": 981997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745376.273, "dur": 1.466, + "args": { + "External id": 981998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745376.618, "dur": 1.037, + "args": { + "External id": 981999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745382.557, "dur": 4.875, + "args": { + "External id": 982000,"Record function id": 0, "Ev Idx": 4591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745384.018, "dur": 2.877, + "args": { + "External id": 982001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745384.833, "dur": 1.459, + "args": { + "External id": 982002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745385.562, "dur": 0.641, + "args": { + "External id": 982003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745391.172, "dur": 4.070, + "args": { + "External id": 982004,"Record function id": 0, "Ev Idx": 4595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745392.424, "dur": 2.316, + "args": { + "External id": 982005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745393.064, "dur": 1.200, + "args": { + "External id": 982006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745393.482, "dur": 0.705, + "args": { + "External id": 982007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745399.198, "dur": 6.897, + "args": { + "External id": 982008,"Record function id": 0, "Ev Idx": 4599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745400.673, "dur": 4.939, + "args": { + "External id": 982009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745401.531, "dur": 3.551, + "args": { + "External id": 982010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745404.354, "dur": 0.622, + "args": { + "External id": 982011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745409.966, "dur": 5.191, + "args": { + "External id": 982012,"Record function id": 0, "Ev Idx": 4603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745411.272, "dur": 3.401, + "args": { + "External id": 982013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745412.395, "dur": 1.771, + "args": { + "External id": 982014,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745412.899, "dur": 1.159, + "args": { + "External id": 982015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745418.972, "dur": 33.485, + "args": { + "External id": 982016,"Record function id": 0, "Ev Idx": 4607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938745449.299, "dur": 2.608, + "args": { + "External id": 982017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745449.977, "dur": 1.374, + "args": { + "External id": 982018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938745450.627, "dur": 0.645, + "args": { + "External id": 982019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938745457.733, "dur": 65737.377, + "args": { + "External id": 982020,"Record function id": 0, "Sequence number": 10552257, "Fwd thread id": 1, "Ev Idx": 4611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938745459.918, "dur": 65725.399, + "args": { + "External id": 982021,"Sequence number": 10552257, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4612 + } + }, + { + "ph": "f", "id": 209, "pid": 2338708, "tid": 2379421, "ts": 6345938745459.918, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 2338708, "tid": 2379421, + "ts": 6345938745495.147, "dur": 46.139, + "args": { + "External id": 982022,"Record function id": 0, "Ev Idx": 4613 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 2338708, "tid": 2379421, + "ts": 6345938745550.681, "dur": 73.817, + "args": { + "External id": 982023,"Record function id": 0, "Ev Idx": 4614 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 2338708, "tid": 2379421, + "ts": 6345938745632.429, "dur": 65541.973, + "args": { + "External id": 982024,"Record function id": 0, "Ev Idx": 4615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938745738.408, "dur": 8.805, + "args": { + "External id": 982025,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938745758.567, "dur": 7.435, + "args": { + "External id": 982026,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938745784.464, "dur": 64411.250, + "args": { + "External id": 982027,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938745800.040, "dur": 64381.006, + "args": { + "External id": 982028,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938745908.834, "dur": 18.750, + "args": { + "External id": 982029,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938745950.112, "dur": 64178.990, + "args": { + "External id": 982030,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938745953.541, "dur": 64174.387, + "args": { + "External id": 982031,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938745958.530, "dur": 10.793, + "args": { + "External id": 982032,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938745971.671, "dur": 64150.746, + "args": { + "External id": 982033,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938810316.717, "dur": 12.775, + "args": { + "External id": 982034,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938810320.671, "dur": 8.342, + "args": { + "External id": 982035,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938810361.603, "dur": 404.349, + "args": { + "External id": 982036,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938810398.113, "dur": 361.909, + "args": { + "External id": 982037,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4628, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938810412.198, "dur": 341.814, + "args": { + "External id": 982038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938810790.942, "dur": 2.578, + "args": { + "External id": 982039,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4630, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938810864.067, "dur": 7.718, + "args": { + "External id": 982040,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938810921.513, "dur": 1.810, + "args": { + "External id": 982041,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938810940.659, "dur": 3.436, + "args": { + "External id": 982042,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938810956.734, "dur": 0.986, + "args": { + "External id": 982043,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938810972.411, "dur": 0.909, + "args": { + "External id": 982044,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938810985.158, "dur": 0.876, + "args": { + "External id": 982045,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938810998.330, "dur": 3.083, + "args": { + "External id": 982046,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938811039.289, "dur": 3.257, + "args": { + "External id": 982047,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938811092.982, "dur": 2.053, + "args": { + "External id": 982048,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938811213.718, "dur": 3336.470, + "args": { + "External id": 982049,"Record function id": 0, "Ev Idx": 4640 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 2338708, "tid": 2379421, + "ts": 6345938811239.895, "dur": 1224.333, + "args": { + "External id": 982050,"Record function id": 0, "Ev Idx": 4641 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338708, "tid": 2379421, + "ts": 6345938811258.169, "dur": 370.443, + "args": { + "External id": 982051,"Record function id": 0, "Ev Idx": 4642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938811357.765, "dur": 4.942, + "args": { + "External id": 982052,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938811366.817, "dur": 0.993, + "args": { + "External id": 982053,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938811370.324, "dur": 3.581, + "args": { + "External id": 982054,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938811376.173, "dur": 0.804, + "args": { + "External id": 982055,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938811378.807, "dur": 1.101, + "args": { + "External id": 982056,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938811381.589, "dur": 1.027, + "args": { + "External id": 982057,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938811384.572, "dur": 2.121, + "args": { + "External id": 982058,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938811390.750, "dur": 0.598, + "args": { + "External id": 982059,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938811393.261, "dur": 0.721, + "args": { + "External id": 982060,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938811395.660, "dur": 0.812, + "args": { + "External id": 982061,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938811417.918, "dur": 176.307, + "args": { + "External id": 982062,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938811435.843, "dur": 152.592, + "args": { + "External id": 982063,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938811459.574, "dur": 20.897, + "args": { + "External id": 982064,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938811484.801, "dur": 72.298, + "args": { + "External id": 982065,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938811487.529, "dur": 69.017, + "args": { + "External id": 982066,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938811492.415, "dur": 5.978, + "args": { + "External id": 982067,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938811500.276, "dur": 55.519, + "args": { + "External id": 982068,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4659 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 2338708, "tid": 2379421, + "ts": 6345938811714.812, "dur": 740.273, + "args": { + "External id": 982069,"Record function id": 0, "Ev Idx": 4660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338708, "tid": 2379421, + "ts": 6345938811734.275, "dur": 705.410, + "args": { + "External id": 982070,"Record function id": 0, "Ev Idx": 4661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938811796.698, "dur": 5.937, + "args": { + "External id": 982071,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938811820.489, "dur": 36.737, + "args": { + "External id": 982072,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938811826.464, "dur": 3.049, + "args": { + "External id": 982073,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938811831.633, "dur": 0.615, + "args": { + "External id": 982074,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938811833.776, "dur": 0.386, + "args": { + "External id": 982075,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938811837.090, "dur": 0.358, + "args": { + "External id": 982076,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938811839.283, "dur": 0.426, + "args": { + "External id": 982077,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938811841.602, "dur": 2.634, + "args": { + "External id": 982078,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938811846.670, "dur": 0.517, + "args": { + "External id": 982079,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938811849.139, "dur": 0.310, + "args": { + "External id": 982080,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938811850.997, "dur": 1.607, + "args": { + "External id": 982081,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938811869.253, "dur": 50.937, + "args": { + "External id": 982082,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345938811954.696, "dur": 204.691, + "args": { + "External id": 982083,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938811965.122, "dur": 4.752, + "args": { + "External id": 982084,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345938811975.576, "dur": 11.402, + "args": { + "External id": 982085,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345938811980.196, "dur": 6.333, + "args": { + "External id": 982086,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938811984.297, "dur": 0.750, + "args": { + "External id": 982087,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938811995.182, "dur": 54.987, + "args": { + "External id": 982088,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938811998.221, "dur": 0.642, + "args": { + "External id": 982089,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938812000.966, "dur": 0.663, + "args": { + "External id": 982090,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938812003.093, "dur": 3.766, + "args": { + "External id": 982091,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938812030.012, "dur": 0.555, + "args": { + "External id": 982092,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938812032.856, "dur": 0.537, + "args": { + "External id": 982093,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938812036.475, "dur": 0.363, + "args": { + "External id": 982094,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938812038.113, "dur": 0.836, + "args": { + "External id": 982095,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938812040.656, "dur": 0.616, + "args": { + "External id": 982096,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938812044.391, "dur": 0.921, + "args": { + "External id": 982097,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938812105.797, "dur": 43.666, + "args": { + "External id": 982098,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938812215.163, "dur": 142.653, + "args": { + "External id": 982099,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938812249.224, "dur": 104.305, + "args": { + "External id": 982100,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4691, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938812261.693, "dur": 86.396, + "args": { + "External id": 982101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938812376.814, "dur": 2.321, + "args": { + "External id": 982102,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4693, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938812473.210, "dur": 2052.886, + "args": { + "External id": 982103,"Sequence number": 10552256, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4694 + } + }, + { + "ph": "f", "id": 210, "pid": 2338708, "tid": 2379421, "ts": 6345938812473.210, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938812601.263, "dur": 117.663, + "args": { + "External id": 982104,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938812769.498, "dur": 47.306, + "args": { + "External id": 982105,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345938812836.274, "dur": 58.124, + "args": { + "External id": 982106,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938812907.911, "dur": 36.419, + "args": { + "External id": 982107,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938812963.646, "dur": 38.511, + "args": { + "External id": 982108,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938813033.339, "dur": 80.496, + "args": { + "External id": 982109,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938813127.669, "dur": 37.470, + "args": { + "External id": 982110,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938813201.871, "dur": 28.123, + "args": { + "External id": 982111,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938813257.342, "dur": 31.247, + "args": { + "External id": 982112,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938813314.909, "dur": 26.358, + "args": { + "External id": 982113,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938813356.249, "dur": 18.294, + "args": { + "External id": 982114,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938813384.020, "dur": 42.497, + "args": { + "External id": 982115,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938813430.549, "dur": 36.316, + "args": { + "External id": 982116,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345938813501.124, "dur": 295.864, + "args": { + "External id": 982117,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938813593.018, "dur": 7.580, + "args": { + "External id": 982118,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938813603.151, "dur": 3.182, + "args": { + "External id": 982119,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938813608.014, "dur": 4.639, + "args": { + "External id": 982120,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938813614.131, "dur": 2.030, + "args": { + "External id": 982121,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938813661.608, "dur": 8.299, + "args": { + "External id": 982122,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938813666.285, "dur": 3.451, + "args": { + "External id": 982123,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938813672.369, "dur": 38.340, + "args": { + "External id": 982124,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938813678.972, "dur": 3.916, + "args": { + "External id": 982125,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938813712.345, "dur": 2.122, + "args": { + "External id": 982126,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938813713.411, "dur": 0.974, + "args": { + "External id": 982127,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938813715.428, "dur": 20.367, + "args": { + "External id": 982128,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938813720.438, "dur": 0.852, + "args": { + "External id": 982129,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938813849.077, "dur": 35.743, + "args": { + "External id": 982130,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938813906.406, "dur": 19.204, + "args": { + "External id": 982131,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938813934.367, "dur": 52.497, + "args": { + "External id": 982132,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938813994.869, "dur": 109.296, + "args": { + "External id": 982133,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938814121.876, "dur": 31.606, + "args": { + "External id": 982134,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938814160.054, "dur": 36.676, + "args": { + "External id": 982135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938814204.514, "dur": 32.710, + "args": { + "External id": 982136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938814245.460, "dur": 33.501, + "args": { + "External id": 982137,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345938814305.029, "dur": 29.400, + "args": { + "External id": 982138,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938814355.812, "dur": 28.266, + "args": { + "External id": 982139,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938814403.392, "dur": 18.869, + "args": { + "External id": 982140,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938814444.065, "dur": 15.028, + "args": { + "External id": 982141,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345938814474.575, "dur": 16.880, + "args": { + "External id": 982142,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814576.385, "dur": 17.484, + "args": { + "External id": 982143,"Record function id": 0, "Ev Idx": 4734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814580.163, "dur": 12.801, + "args": { + "External id": 982144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814584.950, "dur": 6.748, + "args": { + "External id": 982145,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814586.797, "dur": 4.793, + "args": { + "External id": 982146,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814598.633, "dur": 5.979, + "args": { + "External id": 982147,"Record function id": 0, "Ev Idx": 4738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814600.496, "dur": 3.604, + "args": { + "External id": 982148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814601.455, "dur": 2.033, + "args": { + "External id": 982149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814602.291, "dur": 1.099, + "args": { + "External id": 982150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814608.466, "dur": 8.397, + "args": { + "External id": 982151,"Record function id": 0, "Ev Idx": 4742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814610.342, "dur": 5.850, + "args": { + "External id": 982152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814611.111, "dur": 4.533, + "args": { + "External id": 982153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814612.113, "dur": 3.404, + "args": { + "External id": 982154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814620.710, "dur": 5.167, + "args": { + "External id": 982155,"Record function id": 0, "Ev Idx": 4746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814622.329, "dur": 3.055, + "args": { + "External id": 982156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814623.172, "dur": 1.737, + "args": { + "External id": 982157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814623.933, "dur": 0.905, + "args": { + "External id": 982158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814629.543, "dur": 5.165, + "args": { + "External id": 982159,"Record function id": 0, "Ev Idx": 4750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814631.222, "dur": 2.991, + "args": { + "External id": 982160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814632.159, "dur": 1.511, + "args": { + "External id": 982161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814632.667, "dur": 0.917, + "args": { + "External id": 982162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814638.395, "dur": 4.774, + "args": { + "External id": 982163,"Record function id": 0, "Ev Idx": 4754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814639.883, "dur": 2.808, + "args": { + "External id": 982164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814640.603, "dur": 1.553, + "args": { + "External id": 982165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814641.095, "dur": 0.969, + "args": { + "External id": 982166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814646.938, "dur": 6.784, + "args": { + "External id": 982167,"Record function id": 0, "Ev Idx": 4758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814648.336, "dur": 4.928, + "args": { + "External id": 982168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814649.146, "dur": 3.619, + "args": { + "External id": 982169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814651.780, "dur": 0.879, + "args": { + "External id": 982170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814657.758, "dur": 4.820, + "args": { + "External id": 982171,"Record function id": 0, "Ev Idx": 4762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814659.615, "dur": 2.489, + "args": { + "External id": 982172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814660.234, "dur": 1.356, + "args": { + "External id": 982173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814660.737, "dur": 0.753, + "args": { + "External id": 982174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814666.484, "dur": 4.558, + "args": { + "External id": 982175,"Record function id": 0, "Ev Idx": 4766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938814667.795, "dur": 2.757, + "args": { + "External id": 982176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814668.487, "dur": 1.550, + "args": { + "External id": 982177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938814669.161, "dur": 0.789, + "args": { + "External id": 982178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938814676.359, "dur": 64727.330, + "args": { + "External id": 982179,"Record function id": 0, "Sequence number": 10552255, "Fwd thread id": 1, "Ev Idx": 4770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938814678.398, "dur": 64714.940, + "args": { + "External id": 982180,"Sequence number": 10552255, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4771 + } + }, + { + "ph": "f", "id": 211, "pid": 2338708, "tid": 2379421, "ts": 6345938814678.398, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 2338708, "tid": 2379421, + "ts": 6345938814712.424, "dur": 45.489, + "args": { + "External id": 982181,"Record function id": 0, "Ev Idx": 4772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 2338708, "tid": 2379421, + "ts": 6345938814767.905, "dur": 74.706, + "args": { + "External id": 982182,"Record function id": 0, "Ev Idx": 4773 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 2338708, "tid": 2379421, + "ts": 6345938814851.165, "dur": 64531.606, + "args": { + "External id": 982183,"Record function id": 0, "Ev Idx": 4774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938814955.030, "dur": 8.046, + "args": { + "External id": 982184,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938814974.462, "dur": 7.264, + "args": { + "External id": 982185,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938815001.641, "dur": 63412.267, + "args": { + "External id": 982186,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938815041.467, "dur": 63357.722, + "args": { + "External id": 982187,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938815183.580, "dur": 21.493, + "args": { + "External id": 982188,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938815228.413, "dur": 63118.773, + "args": { + "External id": 982189,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938815231.824, "dur": 63114.220, + "args": { + "External id": 982190,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938815237.382, "dur": 14.524, + "args": { + "External id": 982191,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938815254.658, "dur": 63085.537, + "args": { + "External id": 982192,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938878534.763, "dur": 13.453, + "args": { + "External id": 982193,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938878538.513, "dur": 9.168, + "args": { + "External id": 982194,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938878580.605, "dur": 403.255, + "args": { + "External id": 982195,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938878618.275, "dur": 359.682, + "args": { + "External id": 982196,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4787, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938878632.694, "dur": 338.909, + "args": { + "External id": 982197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938879028.510, "dur": 3.960, + "args": { + "External id": 982198,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4789, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938879144.627, "dur": 8.578, + "args": { + "External id": 982199,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938879201.674, "dur": 1.779, + "args": { + "External id": 982200,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938879221.156, "dur": 4.213, + "args": { + "External id": 982201,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938879238.276, "dur": 1.273, + "args": { + "External id": 982202,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938879255.540, "dur": 0.854, + "args": { + "External id": 982203,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938879269.447, "dur": 0.982, + "args": { + "External id": 982204,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938879282.365, "dur": 3.234, + "args": { + "External id": 982205,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938879296.652, "dur": 2.670, + "args": { + "External id": 982206,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938879311.062, "dur": 0.953, + "args": { + "External id": 982207,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938879422.131, "dur": 3367.763, + "args": { + "External id": 982208,"Record function id": 0, "Ev Idx": 4799 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 2338708, "tid": 2379421, + "ts": 6345938879447.124, "dur": 1221.129, + "args": { + "External id": 982209,"Record function id": 0, "Ev Idx": 4800 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338708, "tid": 2379421, + "ts": 6345938879463.456, "dur": 373.798, + "args": { + "External id": 982210,"Record function id": 0, "Ev Idx": 4801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938879562.016, "dur": 4.467, + "args": { + "External id": 982211,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938879570.637, "dur": 1.135, + "args": { + "External id": 982212,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938879573.750, "dur": 3.708, + "args": { + "External id": 982213,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938879579.513, "dur": 1.434, + "args": { + "External id": 982214,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938879582.796, "dur": 0.987, + "args": { + "External id": 982215,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938879585.696, "dur": 0.834, + "args": { + "External id": 982216,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938879588.172, "dur": 2.227, + "args": { + "External id": 982217,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938879593.846, "dur": 0.821, + "args": { + "External id": 982218,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938879596.563, "dur": 0.667, + "args": { + "External id": 982219,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938879599.040, "dur": 0.610, + "args": { + "External id": 982220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938879623.506, "dur": 181.905, + "args": { + "External id": 982221,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938879641.793, "dur": 157.690, + "args": { + "External id": 982222,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938879666.598, "dur": 19.482, + "args": { + "External id": 982223,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938879690.200, "dur": 76.920, + "args": { + "External id": 982224,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938879692.924, "dur": 73.842, + "args": { + "External id": 982225,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938879697.845, "dur": 7.511, + "args": { + "External id": 982226,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938879707.377, "dur": 58.654, + "args": { + "External id": 982227,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4818 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 2338708, "tid": 2379421, + "ts": 6345938879923.529, "dur": 734.963, + "args": { + "External id": 982228,"Record function id": 0, "Ev Idx": 4819 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338708, "tid": 2379421, + "ts": 6345938879943.823, "dur": 701.517, + "args": { + "External id": 982229,"Record function id": 0, "Ev Idx": 4820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938880003.151, "dur": 27.815, + "args": { + "External id": 982230,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938880051.471, "dur": 78.820, + "args": { + "External id": 982231,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880096.649, "dur": 3.018, + "args": { + "External id": 982232,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880102.396, "dur": 0.569, + "args": { + "External id": 982233,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880104.888, "dur": 0.563, + "args": { + "External id": 982234,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880108.154, "dur": 0.373, + "args": { + "External id": 982235,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880110.092, "dur": 0.427, + "args": { + "External id": 982236,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880112.459, "dur": 3.024, + "args": { + "External id": 982237,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880118.576, "dur": 0.828, + "args": { + "External id": 982238,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880121.085, "dur": 0.727, + "args": { + "External id": 982239,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880123.380, "dur": 1.625, + "args": { + "External id": 982240,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938880143.656, "dur": 55.199, + "args": { + "External id": 982241,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345938880238.881, "dur": 133.527, + "args": { + "External id": 982242,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938880251.308, "dur": 6.162, + "args": { + "External id": 982243,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345938880263.462, "dur": 12.173, + "args": { + "External id": 982244,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345938880268.745, "dur": 6.413, + "args": { + "External id": 982245,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880272.927, "dur": 0.860, + "args": { + "External id": 982246,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938880284.642, "dur": 31.080, + "args": { + "External id": 982247,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880287.383, "dur": 0.700, + "args": { + "External id": 982248,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880290.134, "dur": 0.697, + "args": { + "External id": 982249,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880292.525, "dur": 3.555, + "args": { + "External id": 982250,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880297.980, "dur": 0.513, + "args": { + "External id": 982251,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880300.604, "dur": 0.273, + "args": { + "External id": 982252,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880303.707, "dur": 0.382, + "args": { + "External id": 982253,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880305.861, "dur": 0.319, + "args": { + "External id": 982254,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880307.882, "dur": 0.450, + "args": { + "External id": 982255,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938880311.428, "dur": 0.381, + "args": { + "External id": 982256,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938880328.522, "dur": 34.754, + "args": { + "External id": 982257,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938880427.559, "dur": 139.175, + "args": { + "External id": 982258,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 4849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938880461.686, "dur": 100.807, + "args": { + "External id": 982259,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4850, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938880474.138, "dur": 83.522, + "args": { + "External id": 982260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 4851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938880587.358, "dur": 2.427, + "args": { + "External id": 982261,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4852, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938880676.512, "dur": 2091.194, + "args": { + "External id": 982262,"Sequence number": 10552254, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4853 + } + }, + { + "ph": "f", "id": 212, "pid": 2338708, "tid": 2379421, "ts": 6345938880676.512, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938880799.881, "dur": 115.629, + "args": { + "External id": 982263,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 4854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938880984.254, "dur": 107.952, + "args": { + "External id": 982264,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 4855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345938881118.492, "dur": 69.088, + "args": { + "External id": 982265,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 4856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938881203.501, "dur": 38.216, + "args": { + "External id": 982266,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938881249.542, "dur": 37.888, + "args": { + "External id": 982267,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938881294.594, "dur": 32.211, + "args": { + "External id": 982268,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 4859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938881336.965, "dur": 33.023, + "args": { + "External id": 982269,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 4860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938881408.659, "dur": 27.640, + "args": { + "External id": 982270,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 4861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938881460.892, "dur": 33.196, + "args": { + "External id": 982271,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938881520.732, "dur": 23.970, + "args": { + "External id": 982272,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938881560.857, "dur": 17.915, + "args": { + "External id": 982273,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938881587.296, "dur": 42.735, + "args": { + "External id": 982274,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938881634.341, "dur": 37.520, + "args": { + "External id": 982275,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345938881706.942, "dur": 330.749, + "args": { + "External id": 982276,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938881796.702, "dur": 6.990, + "args": { + "External id": 982277,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938881806.377, "dur": 3.385, + "args": { + "External id": 982278,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938881811.623, "dur": 3.770, + "args": { + "External id": 982279,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938881816.936, "dur": 3.727, + "args": { + "External id": 982280,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938881869.935, "dur": 7.515, + "args": { + "External id": 982281,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938881874.245, "dur": 2.970, + "args": { + "External id": 982282,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938881879.761, "dur": 39.989, + "args": { + "External id": 982283,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938881886.177, "dur": 4.691, + "args": { + "External id": 982284,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938881921.646, "dur": 2.260, + "args": { + "External id": 982285,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938881923.002, "dur": 0.798, + "args": { + "External id": 982286,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 4877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938881925.059, "dur": 32.538, + "args": { + "External id": 982287,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 4878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938881927.287, "dur": 0.657, + "args": { + "External id": 982288,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 4879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938882136.302, "dur": 41.622, + "args": { + "External id": 982289,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938882201.177, "dur": 19.568, + "args": { + "External id": 982290,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938882230.209, "dur": 62.433, + "args": { + "External id": 982291,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938882300.845, "dur": 46.751, + "args": { + "External id": 982292,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938882359.443, "dur": 27.168, + "args": { + "External id": 982293,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 4884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938882392.568, "dur": 36.164, + "args": { + "External id": 982294,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 4885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938882437.157, "dur": 33.117, + "args": { + "External id": 982295,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 4886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938882477.575, "dur": 35.086, + "args": { + "External id": 982296,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 4887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345938882537.749, "dur": 28.090, + "args": { + "External id": 982297,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 4888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938882584.892, "dur": 32.788, + "args": { + "External id": 982298,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938882637.526, "dur": 21.335, + "args": { + "External id": 982299,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 4890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938882680.194, "dur": 17.452, + "args": { + "External id": 982300,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 4891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345938882714.526, "dur": 18.251, + "args": { + "External id": 982301,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 4892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882815.243, "dur": 16.881, + "args": { + "External id": 982302,"Record function id": 0, "Ev Idx": 4893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882819.032, "dur": 12.189, + "args": { + "External id": 982303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882823.747, "dur": 6.343, + "args": { + "External id": 982304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882825.439, "dur": 4.538, + "args": { + "External id": 982305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882836.969, "dur": 5.501, + "args": { + "External id": 982306,"Record function id": 0, "Ev Idx": 4897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882838.893, "dur": 2.978, + "args": { + "External id": 982307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882839.729, "dur": 1.548, + "args": { + "External id": 982308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882840.280, "dur": 0.872, + "args": { + "External id": 982309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882846.466, "dur": 7.747, + "args": { + "External id": 982310,"Record function id": 0, "Ev Idx": 4901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882848.060, "dur": 5.619, + "args": { + "External id": 982311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882848.856, "dur": 4.342, + "args": { + "External id": 982312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882849.606, "dur": 3.480, + "args": { + "External id": 982313,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 4904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882858.079, "dur": 5.003, + "args": { + "External id": 982314,"Record function id": 0, "Ev Idx": 4905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882860.031, "dur": 2.555, + "args": { + "External id": 982315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882860.632, "dur": 1.438, + "args": { + "External id": 982316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882860.973, "dur": 1.019, + "args": { + "External id": 982317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 4908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882866.856, "dur": 5.690, + "args": { + "External id": 982318,"Record function id": 0, "Ev Idx": 4909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882868.561, "dur": 3.488, + "args": { + "External id": 982319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882869.289, "dur": 2.211, + "args": { + "External id": 982320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882870.141, "dur": 1.270, + "args": { + "External id": 982321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882876.420, "dur": 5.313, + "args": { + "External id": 982322,"Record function id": 0, "Ev Idx": 4913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882877.975, "dur": 3.280, + "args": { + "External id": 982323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882878.817, "dur": 1.894, + "args": { + "External id": 982324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882879.660, "dur": 0.965, + "args": { + "External id": 982325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 4916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882885.667, "dur": 4.706, + "args": { + "External id": 982326,"Record function id": 0, "Ev Idx": 4917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882887.357, "dur": 2.493, + "args": { + "External id": 982327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882888.059, "dur": 1.295, + "args": { + "External id": 982328,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882888.522, "dur": 0.746, + "args": { + "External id": 982329,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882894.046, "dur": 6.664, + "args": { + "External id": 982330,"Record function id": 0, "Ev Idx": 4921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882895.536, "dur": 4.702, + "args": { + "External id": 982331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882896.097, "dur": 3.580, + "args": { + "External id": 982332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882898.780, "dur": 0.807, + "args": { + "External id": 982333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 4924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882904.372, "dur": 4.428, + "args": { + "External id": 982334,"Record function id": 0, "Ev Idx": 4925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938882905.955, "dur": 2.342, + "args": { + "External id": 982335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882906.552, "dur": 1.220, + "args": { + "External id": 982336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938882906.913, "dur": 0.769, + "args": { + "External id": 982337,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 4928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938882913.727, "dur": 60082.068, + "args": { + "External id": 982338,"Record function id": 0, "Sequence number": 10552253, "Fwd thread id": 1, "Ev Idx": 4929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938882915.627, "dur": 60069.853, + "args": { + "External id": 982339,"Sequence number": 10552253, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 4930 + } + }, + { + "ph": "f", "id": 213, "pid": 2338708, "tid": 2379421, "ts": 6345938882915.627, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 2338708, "tid": 2379421, + "ts": 6345938882947.797, "dur": 47.304, + "args": { + "External id": 982340,"Record function id": 0, "Ev Idx": 4931 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 2338708, "tid": 2379421, + "ts": 6345938883004.628, "dur": 144.619, + "args": { + "External id": 982341,"Record function id": 0, "Ev Idx": 4932 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 2338708, "tid": 2379421, + "ts": 6345938883160.103, "dur": 59816.454, + "args": { + "External id": 982342,"Record function id": 0, "Ev Idx": 4933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938883266.823, "dur": 8.724, + "args": { + "External id": 982343,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938883287.461, "dur": 7.875, + "args": { + "External id": 982344,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938883314.133, "dur": 58725.997, + "args": { + "External id": 982345,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938883329.914, "dur": 58695.021, + "args": { + "External id": 982346,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 4937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938883429.134, "dur": 19.489, + "args": { + "External id": 982347,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938883471.576, "dur": 58489.124, + "args": { + "External id": 982348,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 4939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938883477.837, "dur": 58481.688, + "args": { + "External id": 982349,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 4940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938883484.625, "dur": 10.482, + "args": { + "External id": 982350,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938883497.442, "dur": 58456.345, + "args": { + "External id": 982351,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 4942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938942190.042, "dur": 13.869, + "args": { + "External id": 982352,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 4943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938942193.920, "dur": 9.341, + "args": { + "External id": 982353,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938942237.841, "dur": 411.249, + "args": { + "External id": 982354,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 4945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938942274.861, "dur": 368.565, + "args": { + "External id": 982355,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4946, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938942289.624, "dur": 347.872, + "args": { + "External id": 982356,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 4947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938942674.016, "dur": 2.879, + "args": { + "External id": 982357,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4948, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938942743.940, "dur": 7.444, + "args": { + "External id": 982358,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938942801.287, "dur": 1.694, + "args": { + "External id": 982359,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938942820.568, "dur": 3.715, + "args": { + "External id": 982360,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938942837.558, "dur": 0.963, + "args": { + "External id": 982361,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938942851.687, "dur": 0.889, + "args": { + "External id": 982362,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938942863.864, "dur": 0.886, + "args": { + "External id": 982363,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938942879.317, "dur": 3.560, + "args": { + "External id": 982364,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938942894.763, "dur": 2.360, + "args": { + "External id": 982365,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938942908.231, "dur": 0.835, + "args": { + "External id": 982366,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938943032.008, "dur": 3339.246, + "args": { + "External id": 982367,"Record function id": 0, "Ev Idx": 4958 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 2338708, "tid": 2379421, + "ts": 6345938943093.376, "dur": 1232.897, + "args": { + "External id": 982368,"Record function id": 0, "Ev Idx": 4959 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338708, "tid": 2379421, + "ts": 6345938943112.071, "dur": 388.421, + "args": { + "External id": 982369,"Record function id": 0, "Ev Idx": 4960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938943213.525, "dur": 6.003, + "args": { + "External id": 982370,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 4961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938943223.424, "dur": 1.008, + "args": { + "External id": 982371,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938943226.776, "dur": 4.468, + "args": { + "External id": 982372,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938943235.385, "dur": 0.849, + "args": { + "External id": 982373,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938943238.178, "dur": 0.877, + "args": { + "External id": 982374,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938943241.078, "dur": 0.833, + "args": { + "External id": 982375,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 4966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938943243.878, "dur": 2.036, + "args": { + "External id": 982376,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938943249.799, "dur": 0.852, + "args": { + "External id": 982377,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938943252.669, "dur": 0.732, + "args": { + "External id": 982378,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938943255.197, "dur": 0.632, + "args": { + "External id": 982379,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 4970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938943277.761, "dur": 186.211, + "args": { + "External id": 982380,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938943296.437, "dur": 161.803, + "args": { + "External id": 982381,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 4972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938943318.349, "dur": 21.119, + "args": { + "External id": 982382,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938943345.743, "dur": 77.055, + "args": { + "External id": 982383,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 4974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938943348.813, "dur": 73.527, + "args": { + "External id": 982384,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 4975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943353.611, "dur": 6.950, + "args": { + "External id": 982385,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938943362.546, "dur": 59.119, + "args": { + "External id": 982386,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 4977 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 2338708, "tid": 2379421, + "ts": 6345938943590.137, "dur": 727.109, + "args": { + "External id": 982387,"Record function id": 0, "Ev Idx": 4978 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338708, "tid": 2379421, + "ts": 6345938943608.277, "dur": 693.692, + "args": { + "External id": 982388,"Record function id": 0, "Ev Idx": 4979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938943669.073, "dur": 5.853, + "args": { + "External id": 982389,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938943692.974, "dur": 37.659, + "args": { + "External id": 982390,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943698.646, "dur": 3.333, + "args": { + "External id": 982391,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943704.289, "dur": 0.547, + "args": { + "External id": 982392,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943706.721, "dur": 0.559, + "args": { + "External id": 982393,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943710.773, "dur": 0.550, + "args": { + "External id": 982394,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943712.930, "dur": 0.453, + "args": { + "External id": 982395,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943715.196, "dur": 2.620, + "args": { + "External id": 982396,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943720.730, "dur": 0.576, + "args": { + "External id": 982397,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943722.757, "dur": 0.542, + "args": { + "External id": 982398,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943724.471, "dur": 1.531, + "args": { + "External id": 982399,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938943742.726, "dur": 51.048, + "args": { + "External id": 982400,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 4991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345938943829.548, "dur": 129.503, + "args": { + "External id": 982401,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 4992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938943840.642, "dur": 3.915, + "args": { + "External id": 982402,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345938943850.349, "dur": 11.717, + "args": { + "External id": 982403,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345938943855.153, "dur": 6.429, + "args": { + "External id": 982404,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 4995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943859.386, "dur": 0.829, + "args": { + "External id": 982405,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 4996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345938943870.786, "dur": 28.880, + "args": { + "External id": 982406,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 4997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943873.479, "dur": 0.492, + "args": { + "External id": 982407,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943876.458, "dur": 0.518, + "args": { + "External id": 982408,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 4999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943878.401, "dur": 2.800, + "args": { + "External id": 982409,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943882.785, "dur": 0.462, + "args": { + "External id": 982410,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943884.660, "dur": 0.308, + "args": { + "External id": 982411,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943888.382, "dur": 0.287, + "args": { + "External id": 982412,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943890.111, "dur": 0.505, + "args": { + "External id": 982413,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943892.450, "dur": 0.489, + "args": { + "External id": 982414,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938943895.148, "dur": 0.387, + "args": { + "External id": 982415,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938943913.344, "dur": 37.147, + "args": { + "External id": 982416,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345938944029.681, "dur": 182.656, + "args": { + "External id": 982417,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938944101.464, "dur": 106.201, + "args": { + "External id": 982418,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5009, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345938944114.288, "dur": 88.214, + "args": { + "External id": 982419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345938944235.475, "dur": 2.503, + "args": { + "External id": 982420,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5011, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938944335.294, "dur": 2012.478, + "args": { + "External id": 982421,"Sequence number": 10552252, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5012 + } + }, + { + "ph": "f", "id": 214, "pid": 2338708, "tid": 2379421, "ts": 6345938944335.294, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938944464.020, "dur": 119.005, + "args": { + "External id": 982422,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938944632.542, "dur": 43.470, + "args": { + "External id": 982423,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345938944694.561, "dur": 56.295, + "args": { + "External id": 982424,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938944764.922, "dur": 35.891, + "args": { + "External id": 982425,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938944808.197, "dur": 37.335, + "args": { + "External id": 982426,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938944852.621, "dur": 31.748, + "args": { + "External id": 982427,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938944892.292, "dur": 34.179, + "args": { + "External id": 982428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938944957.956, "dur": 26.589, + "args": { + "External id": 982429,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345938945035.982, "dur": 76.148, + "args": { + "External id": 982430,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938945146.895, "dur": 25.014, + "args": { + "External id": 982431,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938945190.430, "dur": 18.533, + "args": { + "External id": 982432,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938945217.523, "dur": 49.754, + "args": { + "External id": 982433,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938945271.413, "dur": 38.759, + "args": { + "External id": 982434,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345938945344.001, "dur": 303.608, + "args": { + "External id": 982435,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938945434.816, "dur": 6.804, + "args": { + "External id": 982436,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938945444.340, "dur": 2.692, + "args": { + "External id": 982437,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938945448.697, "dur": 3.701, + "args": { + "External id": 982438,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938945453.907, "dur": 1.948, + "args": { + "External id": 982439,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938945504.453, "dur": 5.934, + "args": { + "External id": 982440,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938945506.827, "dur": 3.391, + "args": { + "External id": 982441,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938945519.222, "dur": 37.400, + "args": { + "External id": 982442,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938945527.080, "dur": 3.231, + "args": { + "External id": 982443,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345938945558.781, "dur": 2.138, + "args": { + "External id": 982444,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938945559.698, "dur": 1.086, + "args": { + "External id": 982445,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345938945562.212, "dur": 21.473, + "args": { + "External id": 982446,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938945566.398, "dur": 0.676, + "args": { + "External id": 982447,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345938945695.049, "dur": 34.176, + "args": { + "External id": 982448,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938945755.847, "dur": 18.523, + "args": { + "External id": 982449,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938945783.080, "dur": 49.714, + "args": { + "External id": 982450,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938945840.000, "dur": 42.652, + "args": { + "External id": 982451,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938945894.078, "dur": 23.641, + "args": { + "External id": 982452,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938945924.199, "dur": 34.913, + "args": { + "External id": 982453,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938945967.200, "dur": 31.394, + "args": { + "External id": 982454,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345938946005.660, "dur": 96.533, + "args": { + "External id": 982455,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345938946131.036, "dur": 26.569, + "args": { + "External id": 982456,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938946179.382, "dur": 25.232, + "args": { + "External id": 982457,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345938946225.023, "dur": 18.776, + "args": { + "External id": 982458,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345938946263.501, "dur": 14.337, + "args": { + "External id": 982459,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345938946296.138, "dur": 18.063, + "args": { + "External id": 982460,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946397.905, "dur": 17.719, + "args": { + "External id": 982461,"Record function id": 0, "Ev Idx": 5052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946401.811, "dur": 12.790, + "args": { + "External id": 982462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946406.848, "dur": 6.851, + "args": { + "External id": 982463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946408.811, "dur": 4.792, + "args": { + "External id": 982464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946420.598, "dur": 5.542, + "args": { + "External id": 982465,"Record function id": 0, "Ev Idx": 5056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946422.487, "dur": 3.069, + "args": { + "External id": 982466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946423.417, "dur": 1.618, + "args": { + "External id": 982467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946423.971, "dur": 0.969, + "args": { + "External id": 982468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946430.009, "dur": 7.839, + "args": { + "External id": 982469,"Record function id": 0, "Ev Idx": 5060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946431.568, "dur": 5.712, + "args": { + "External id": 982470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946432.348, "dur": 4.410, + "args": { + "External id": 982471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946433.461, "dur": 3.175, + "args": { + "External id": 982472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946441.632, "dur": 4.762, + "args": { + "External id": 982473,"Record function id": 0, "Ev Idx": 5064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946443.128, "dur": 2.721, + "args": { + "External id": 982474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946443.896, "dur": 1.465, + "args": { + "External id": 982475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946444.428, "dur": 0.793, + "args": { + "External id": 982476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946450.180, "dur": 4.448, + "args": { + "External id": 982477,"Record function id": 0, "Ev Idx": 5068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946451.514, "dur": 2.569, + "args": { + "External id": 982478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946452.472, "dur": 1.135, + "args": { + "External id": 982479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946452.774, "dur": 0.746, + "args": { + "External id": 982480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946458.321, "dur": 4.970, + "args": { + "External id": 982481,"Record function id": 0, "Ev Idx": 5072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946459.980, "dur": 2.783, + "args": { + "External id": 982482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946460.687, "dur": 1.555, + "args": { + "External id": 982483,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946461.426, "dur": 0.728, + "args": { + "External id": 982484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946467.210, "dur": 5.247, + "args": { + "External id": 982485,"Record function id": 0, "Ev Idx": 5076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946468.881, "dur": 3.027, + "args": { + "External id": 982486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946470.174, "dur": 1.269, + "args": { + "External id": 982487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946470.581, "dur": 0.785, + "args": { + "External id": 982488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946476.169, "dur": 4.096, + "args": { + "External id": 982489,"Record function id": 0, "Ev Idx": 5080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946477.532, "dur": 2.270, + "args": { + "External id": 982490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946478.128, "dur": 1.219, + "args": { + "External id": 982491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946478.588, "dur": 0.681, + "args": { + "External id": 982492,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946484.704, "dur": 6.564, + "args": { + "External id": 982493,"Record function id": 0, "Ev Idx": 5084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345938946486.076, "dur": 4.654, + "args": { + "External id": 982494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946486.671, "dur": 3.600, + "args": { + "External id": 982495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345938946489.510, "dur": 0.682, + "args": { + "External id": 982496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938946496.545, "dur": 59963.794, + "args": { + "External id": 982497,"Record function id": 0, "Sequence number": 10552251, "Fwd thread id": 1, "Ev Idx": 5088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345938946498.276, "dur": 59951.250, + "args": { + "External id": 982498,"Sequence number": 10552251, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5089 + } + }, + { + "ph": "f", "id": 215, "pid": 2338708, "tid": 2379421, "ts": 6345938946498.276, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 2338708, "tid": 2379421, + "ts": 6345938946531.082, "dur": 45.428, + "args": { + "External id": 982499,"Record function id": 0, "Ev Idx": 5090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 2338708, "tid": 2379421, + "ts": 6345938946586.391, "dur": 72.504, + "args": { + "External id": 982500,"Record function id": 0, "Ev Idx": 5091 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 2338708, "tid": 2379421, + "ts": 6345938946666.423, "dur": 59773.265, + "args": { + "External id": 982501,"Record function id": 0, "Ev Idx": 5092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938946768.742, "dur": 7.973, + "args": { + "External id": 982502,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345938946790.754, "dur": 7.057, + "args": { + "External id": 982503,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938946812.927, "dur": 58589.547, + "args": { + "External id": 982504,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345938946828.445, "dur": 58559.380, + "args": { + "External id": 982505,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345938946928.839, "dur": 19.058, + "args": { + "External id": 982506,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345938946971.244, "dur": 58365.087, + "args": { + "External id": 982507,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345938946974.214, "dur": 58360.984, + "args": { + "External id": 982508,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345938946979.648, "dur": 12.339, + "args": { + "External id": 982509,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345938946994.226, "dur": 58335.393, + "args": { + "External id": 982510,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939005524.748, "dur": 12.962, + "args": { + "External id": 982511,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939005528.546, "dur": 8.700, + "args": { + "External id": 982512,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345939005570.334, "dur": 517.412, + "args": { + "External id": 982513,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939005607.311, "dur": 439.851, + "args": { + "External id": 982514,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5105, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345939005622.209, "dur": 416.531, + "args": { + "External id": 982515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939006120.502, "dur": 3.871, + "args": { + "External id": 982516,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5107, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939006203.326, "dur": 8.149, + "args": { + "External id": 982517,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939006261.311, "dur": 1.820, + "args": { + "External id": 982518,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939006280.877, "dur": 3.722, + "args": { + "External id": 982519,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939006298.101, "dur": 1.130, + "args": { + "External id": 982520,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939006312.557, "dur": 0.889, + "args": { + "External id": 982521,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939006324.608, "dur": 1.151, + "args": { + "External id": 982522,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939006339.686, "dur": 3.371, + "args": { + "External id": 982523,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939006354.375, "dur": 2.589, + "args": { + "External id": 982524,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939006367.824, "dur": 1.177, + "args": { + "External id": 982525,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939006478.330, "dur": 3362.582, + "args": { + "External id": 982526,"Record function id": 0, "Ev Idx": 5117 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 2338708, "tid": 2379421, + "ts": 6345939006500.993, "dur": 1268.442, + "args": { + "External id": 982527,"Record function id": 0, "Ev Idx": 5118 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338708, "tid": 2379421, + "ts": 6345939006518.289, "dur": 406.876, + "args": { + "External id": 982528,"Record function id": 0, "Ev Idx": 5119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939006649.039, "dur": 4.760, + "args": { + "External id": 982529,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939006658.177, "dur": 0.792, + "args": { + "External id": 982530,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939006661.438, "dur": 2.869, + "args": { + "External id": 982531,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939006668.343, "dur": 0.662, + "args": { + "External id": 982532,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939006670.545, "dur": 0.734, + "args": { + "External id": 982533,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939006673.065, "dur": 0.562, + "args": { + "External id": 982534,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939006675.515, "dur": 2.141, + "args": { + "External id": 982535,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939006681.676, "dur": 0.590, + "args": { + "External id": 982536,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939006683.925, "dur": 0.585, + "args": { + "External id": 982537,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939006686.216, "dur": 0.681, + "args": { + "External id": 982538,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939006708.083, "dur": 180.723, + "args": { + "External id": 982539,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939006726.726, "dur": 156.760, + "args": { + "External id": 982540,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939006747.939, "dur": 19.563, + "args": { + "External id": 982541,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345939006771.731, "dur": 80.242, + "args": { + "External id": 982542,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939006774.586, "dur": 76.912, + "args": { + "External id": 982543,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939006779.142, "dur": 6.617, + "args": { + "External id": 982544,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939006787.758, "dur": 63.014, + "args": { + "External id": 982545,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5136 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 2338708, "tid": 2379421, + "ts": 6345939007040.846, "dur": 719.507, + "args": { + "External id": 982546,"Record function id": 0, "Ev Idx": 5137 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338708, "tid": 2379421, + "ts": 6345939007103.493, "dur": 642.856, + "args": { + "External id": 982547,"Record function id": 0, "Ev Idx": 5138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939007175.447, "dur": 8.079, + "args": { + "External id": 982548,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345939007201.968, "dur": 38.495, + "args": { + "External id": 982549,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007207.551, "dur": 2.001, + "args": { + "External id": 982550,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007211.964, "dur": 2.175, + "args": { + "External id": 982551,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007215.759, "dur": 0.616, + "args": { + "External id": 982552,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007218.014, "dur": 0.362, + "args": { + "External id": 982553,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007221.571, "dur": 0.460, + "args": { + "External id": 982554,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007223.968, "dur": 2.638, + "args": { + "External id": 982555,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007228.361, "dur": 0.440, + "args": { + "External id": 982556,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007232.508, "dur": 0.377, + "args": { + "External id": 982557,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007234.584, "dur": 0.359, + "args": { + "External id": 982558,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939007252.751, "dur": 56.940, + "args": { + "External id": 982559,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345939007345.040, "dur": 130.693, + "args": { + "External id": 982560,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939007356.908, "dur": 4.336, + "args": { + "External id": 982561,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345939007367.161, "dur": 11.357, + "args": { + "External id": 982562,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345939007371.981, "dur": 6.026, + "args": { + "External id": 982563,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007375.815, "dur": 0.779, + "args": { + "External id": 982564,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345939007386.571, "dur": 29.094, + "args": { + "External id": 982565,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007389.102, "dur": 0.419, + "args": { + "External id": 982566,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007392.308, "dur": 0.332, + "args": { + "External id": 982567,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007394.164, "dur": 2.676, + "args": { + "External id": 982568,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007398.480, "dur": 1.662, + "args": { + "External id": 982569,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007402.183, "dur": 0.425, + "args": { + "External id": 982570,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007404.205, "dur": 0.335, + "args": { + "External id": 982571,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007407.819, "dur": 0.292, + "args": { + "External id": 982572,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007409.422, "dur": 0.289, + "args": { + "External id": 982573,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939007411.564, "dur": 0.580, + "args": { + "External id": 982574,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939007431.790, "dur": 34.977, + "args": { + "External id": 982575,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345939007526.441, "dur": 140.168, + "args": { + "External id": 982576,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939007562.195, "dur": 99.905, + "args": { + "External id": 982577,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5168, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345939007574.390, "dur": 82.644, + "args": { + "External id": 982578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939007686.793, "dur": 2.343, + "args": { + "External id": 982579,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5170, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939007778.177, "dur": 2040.440, + "args": { + "External id": 982580,"Sequence number": 10552250, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5171 + } + }, + { + "ph": "f", "id": 216, "pid": 2338708, "tid": 2379421, "ts": 6345939007778.177, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939007904.476, "dur": 141.363, + "args": { + "External id": 982581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345939008142.919, "dur": 47.128, + "args": { + "External id": 982582,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345939008211.315, "dur": 64.780, + "args": { + "External id": 982583,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939008290.549, "dur": 35.694, + "args": { + "External id": 982584,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939008333.685, "dur": 37.197, + "args": { + "External id": 982585,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939008378.427, "dur": 31.699, + "args": { + "External id": 982586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939008419.146, "dur": 33.706, + "args": { + "External id": 982587,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345939008484.647, "dur": 27.714, + "args": { + "External id": 982588,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345939008533.818, "dur": 33.793, + "args": { + "External id": 982589,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345939008595.757, "dur": 22.007, + "args": { + "External id": 982590,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345939008634.001, "dur": 18.170, + "args": { + "External id": 982591,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939008669.261, "dur": 40.091, + "args": { + "External id": 982592,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939008714.338, "dur": 35.625, + "args": { + "External id": 982593,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345939008785.031, "dur": 377.722, + "args": { + "External id": 982594,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939008876.260, "dur": 7.067, + "args": { + "External id": 982595,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939008885.985, "dur": 3.404, + "args": { + "External id": 982596,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939008890.962, "dur": 3.683, + "args": { + "External id": 982597,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939008895.945, "dur": 2.096, + "args": { + "External id": 982598,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345939008957.266, "dur": 6.383, + "args": { + "External id": 982599,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939008959.307, "dur": 3.753, + "args": { + "External id": 982600,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345939008965.838, "dur": 60.976, + "args": { + "External id": 982601,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939008972.882, "dur": 5.615, + "args": { + "External id": 982602,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345939009030.757, "dur": 4.641, + "args": { + "External id": 982603,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939009034.090, "dur": 1.222, + "args": { + "External id": 982604,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345939009036.866, "dur": 59.204, + "args": { + "External id": 982605,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939009041.564, "dur": 0.983, + "args": { + "External id": 982606,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345939009213.516, "dur": 33.053, + "args": { + "External id": 982607,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345939009268.482, "dur": 18.785, + "args": { + "External id": 982608,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939009296.817, "dur": 55.687, + "args": { + "External id": 982609,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939009359.707, "dur": 45.650, + "args": { + "External id": 982610,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939009416.744, "dur": 24.467, + "args": { + "External id": 982611,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939009447.932, "dur": 36.445, + "args": { + "External id": 982612,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939009492.461, "dur": 32.902, + "args": { + "External id": 982613,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939009532.842, "dur": 39.862, + "args": { + "External id": 982614,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345939009596.568, "dur": 26.540, + "args": { + "External id": 982615,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345939009643.375, "dur": 27.544, + "args": { + "External id": 982616,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345939009690.316, "dur": 22.259, + "args": { + "External id": 982617,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345939009731.788, "dur": 17.842, + "args": { + "External id": 982618,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345939009767.775, "dur": 20.151, + "args": { + "External id": 982619,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009866.933, "dur": 17.138, + "args": { + "External id": 982620,"Record function id": 0, "Ev Idx": 5211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009871.013, "dur": 12.020, + "args": { + "External id": 982621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009875.954, "dur": 6.200, + "args": { + "External id": 982622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009877.722, "dur": 4.323, + "args": { + "External id": 982623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009888.803, "dur": 5.927, + "args": { + "External id": 982624,"Record function id": 0, "Ev Idx": 5215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009890.805, "dur": 3.381, + "args": { + "External id": 982625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009891.826, "dur": 1.879, + "args": { + "External id": 982626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009892.575, "dur": 0.987, + "args": { + "External id": 982627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009898.766, "dur": 8.492, + "args": { + "External id": 982628,"Record function id": 0, "Ev Idx": 5219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009900.383, "dur": 6.375, + "args": { + "External id": 982629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009901.599, "dur": 4.669, + "args": { + "External id": 982630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009902.586, "dur": 3.544, + "args": { + "External id": 982631,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009911.028, "dur": 4.676, + "args": { + "External id": 982632,"Record function id": 0, "Ev Idx": 5223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009912.762, "dur": 2.466, + "args": { + "External id": 982633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009913.388, "dur": 1.390, + "args": { + "External id": 982634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009913.928, "dur": 0.769, + "args": { + "External id": 982635,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009919.370, "dur": 4.195, + "args": { + "External id": 982636,"Record function id": 0, "Ev Idx": 5227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009920.589, "dur": 2.511, + "args": { + "External id": 982637,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009921.245, "dur": 1.372, + "args": { + "External id": 982638,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009921.786, "dur": 0.742, + "args": { + "External id": 982639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009927.438, "dur": 5.025, + "args": { + "External id": 982640,"Record function id": 0, "Ev Idx": 5231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009928.804, "dur": 3.173, + "args": { + "External id": 982641,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009929.368, "dur": 2.167, + "args": { + "External id": 982642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009930.156, "dur": 1.290, + "args": { + "External id": 982643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009936.221, "dur": 4.369, + "args": { + "External id": 982644,"Record function id": 0, "Ev Idx": 5235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009937.610, "dur": 2.505, + "args": { + "External id": 982645,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009938.192, "dur": 1.449, + "args": { + "External id": 982646,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009938.912, "dur": 0.637, + "args": { + "External id": 982647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009944.237, "dur": 4.269, + "args": { + "External id": 982648,"Record function id": 0, "Ev Idx": 5239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009945.759, "dur": 2.289, + "args": { + "External id": 982649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009946.488, "dur": 1.109, + "args": { + "External id": 982650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009946.805, "dur": 0.704, + "args": { + "External id": 982651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009952.624, "dur": 5.763, + "args": { + "External id": 982652,"Record function id": 0, "Ev Idx": 5243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939009953.866, "dur": 4.055, + "args": { + "External id": 982653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009954.448, "dur": 2.995, + "args": { + "External id": 982654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939009956.623, "dur": 0.729, + "args": { + "External id": 982655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939009963.594, "dur": 60508.954, + "args": { + "External id": 982656,"Record function id": 0, "Sequence number": 10552249, "Fwd thread id": 1, "Ev Idx": 5247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939009965.479, "dur": 60496.945, + "args": { + "External id": 982657,"Sequence number": 10552249, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5248 + } + }, + { + "ph": "f", "id": 217, "pid": 2338708, "tid": 2379421, "ts": 6345939009965.479, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 2338708, "tid": 2379421, + "ts": 6345939010001.643, "dur": 107.425, + "args": { + "External id": 982658,"Record function id": 0, "Ev Idx": 5249 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 2338708, "tid": 2379421, + "ts": 6345939010120.503, "dur": 73.000, + "args": { + "External id": 982659,"Record function id": 0, "Ev Idx": 5250 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 2338708, "tid": 2379421, + "ts": 6345939010201.280, "dur": 60252.296, + "args": { + "External id": 982660,"Record function id": 0, "Ev Idx": 5251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939010303.650, "dur": 8.245, + "args": { + "External id": 982661,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939010326.730, "dur": 7.833, + "args": { + "External id": 982662,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345939010350.806, "dur": 59132.394, + "args": { + "External id": 982663,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345939010366.462, "dur": 59102.646, + "args": { + "External id": 982664,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939010466.917, "dur": 19.020, + "args": { + "External id": 982665,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345939010508.455, "dur": 58904.947, + "args": { + "External id": 982666,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939010511.895, "dur": 58900.406, + "args": { + "External id": 982667,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939010517.220, "dur": 11.157, + "args": { + "External id": 982668,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939010530.622, "dur": 58875.766, + "args": { + "External id": 982669,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939069602.118, "dur": 13.674, + "args": { + "External id": 982670,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939069606.059, "dur": 9.284, + "args": { + "External id": 982671,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345939069647.839, "dur": 465.241, + "args": { + "External id": 982672,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939069685.650, "dur": 421.026, + "args": { + "External id": 982673,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5264, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345939069699.087, "dur": 400.106, + "args": { + "External id": 982674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939070141.598, "dur": 3.609, + "args": { + "External id": 982675,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5266, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939070220.077, "dur": 7.871, + "args": { + "External id": 982676,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939070280.047, "dur": 1.761, + "args": { + "External id": 982677,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939070299.712, "dur": 3.702, + "args": { + "External id": 982678,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939070316.010, "dur": 0.977, + "args": { + "External id": 982679,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939070328.847, "dur": 1.007, + "args": { + "External id": 982680,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939070341.619, "dur": 0.909, + "args": { + "External id": 982681,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939070356.547, "dur": 3.170, + "args": { + "External id": 982682,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939070370.766, "dur": 2.219, + "args": { + "External id": 982683,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939070383.954, "dur": 1.048, + "args": { + "External id": 982684,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939070488.989, "dur": 3303.722, + "args": { + "External id": 982685,"Record function id": 0, "Ev Idx": 5276 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 2338708, "tid": 2379421, + "ts": 6345939070509.860, "dur": 1197.533, + "args": { + "External id": 982686,"Record function id": 0, "Ev Idx": 5277 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338708, "tid": 2379421, + "ts": 6345939070526.927, "dur": 361.594, + "args": { + "External id": 982687,"Record function id": 0, "Ev Idx": 5278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939070618.938, "dur": 5.113, + "args": { + "External id": 982688,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939070627.908, "dur": 1.160, + "args": { + "External id": 982689,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939070631.241, "dur": 3.640, + "args": { + "External id": 982690,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939070637.061, "dur": 0.719, + "args": { + "External id": 982691,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939070641.827, "dur": 1.330, + "args": { + "External id": 982692,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939070644.804, "dur": 1.090, + "args": { + "External id": 982693,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939070647.795, "dur": 2.043, + "args": { + "External id": 982694,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939070651.491, "dur": 0.707, + "args": { + "External id": 982695,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939070656.167, "dur": 0.725, + "args": { + "External id": 982696,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939070658.572, "dur": 0.809, + "args": { + "External id": 982697,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939070679.806, "dur": 175.848, + "args": { + "External id": 982698,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939070697.042, "dur": 152.911, + "args": { + "External id": 982699,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939070716.941, "dur": 17.434, + "args": { + "External id": 982700,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345939070738.429, "dur": 78.356, + "args": { + "External id": 982701,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939070743.876, "dur": 72.536, + "args": { + "External id": 982702,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939070748.290, "dur": 5.992, + "args": { + "External id": 982703,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939070756.272, "dur": 59.110, + "args": { + "External id": 982704,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5295 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 2338708, "tid": 2379421, + "ts": 6345939070972.165, "dur": 725.425, + "args": { + "External id": 982705,"Record function id": 0, "Ev Idx": 5296 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338708, "tid": 2379421, + "ts": 6345939070989.814, "dur": 693.394, + "args": { + "External id": 982706,"Record function id": 0, "Ev Idx": 5297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939071113.275, "dur": 7.738, + "args": { + "External id": 982707,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345939071140.289, "dur": 43.139, + "args": { + "External id": 982708,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071146.641, "dur": 3.588, + "args": { + "External id": 982709,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071157.595, "dur": 0.753, + "args": { + "External id": 982710,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071159.879, "dur": 0.390, + "args": { + "External id": 982711,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071163.084, "dur": 0.885, + "args": { + "External id": 982712,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071165.435, "dur": 0.303, + "args": { + "External id": 982713,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071167.187, "dur": 3.800, + "args": { + "External id": 982714,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071172.492, "dur": 0.321, + "args": { + "External id": 982715,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071174.509, "dur": 0.261, + "args": { + "External id": 982716,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071178.096, "dur": 0.389, + "args": { + "External id": 982717,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939071195.724, "dur": 52.528, + "args": { + "External id": 982718,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345939071284.233, "dur": 130.502, + "args": { + "External id": 982719,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939071296.047, "dur": 4.076, + "args": { + "External id": 982720,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345939071305.978, "dur": 11.895, + "args": { + "External id": 982721,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345939071310.955, "dur": 6.453, + "args": { + "External id": 982722,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071315.099, "dur": 0.833, + "args": { + "External id": 982723,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345939071325.779, "dur": 31.078, + "args": { + "External id": 982724,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071328.526, "dur": 0.535, + "args": { + "External id": 982725,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071330.998, "dur": 0.392, + "args": { + "External id": 982726,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071335.486, "dur": 2.546, + "args": { + "External id": 982727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071339.688, "dur": 0.490, + "args": { + "External id": 982728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071341.804, "dur": 0.520, + "args": { + "External id": 982729,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071345.476, "dur": 0.660, + "args": { + "External id": 982730,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071347.972, "dur": 0.591, + "args": { + "External id": 982731,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071349.810, "dur": 1.647, + "args": { + "External id": 982732,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939071353.401, "dur": 0.667, + "args": { + "External id": 982733,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939071371.067, "dur": 34.620, + "args": { + "External id": 982734,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345939071467.637, "dur": 137.060, + "args": { + "External id": 982735,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939071502.715, "dur": 97.989, + "args": { + "External id": 982736,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5327, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345939071513.798, "dur": 82.046, + "args": { + "External id": 982737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939071624.622, "dur": 2.441, + "args": { + "External id": 982738,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5329, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939071716.595, "dur": 2053.222, + "args": { + "External id": 982739,"Sequence number": 10552248, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5330 + } + }, + { + "ph": "f", "id": 218, "pid": 2338708, "tid": 2379421, "ts": 6345939071716.595, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939071842.143, "dur": 118.425, + "args": { + "External id": 982740,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345939072031.117, "dur": 87.878, + "args": { + "External id": 982741,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345939072144.367, "dur": 66.845, + "args": { + "External id": 982742,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939072226.167, "dur": 38.401, + "args": { + "External id": 982743,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939072272.336, "dur": 37.721, + "args": { + "External id": 982744,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939072317.064, "dur": 31.882, + "args": { + "External id": 982745,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939072357.605, "dur": 33.442, + "args": { + "External id": 982746,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345939072425.093, "dur": 29.963, + "args": { + "External id": 982747,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345939072476.849, "dur": 32.788, + "args": { + "External id": 982748,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345939072539.945, "dur": 22.588, + "args": { + "External id": 982749,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345939072577.060, "dur": 18.000, + "args": { + "External id": 982750,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939072604.977, "dur": 44.480, + "args": { + "External id": 982751,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939072654.321, "dur": 38.186, + "args": { + "External id": 982752,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345939072737.978, "dur": 312.939, + "args": { + "External id": 982753,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939072829.033, "dur": 6.935, + "args": { + "External id": 982754,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939072838.533, "dur": 5.135, + "args": { + "External id": 982755,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939072847.329, "dur": 3.206, + "args": { + "External id": 982756,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939072852.293, "dur": 2.489, + "args": { + "External id": 982757,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345939072904.746, "dur": 5.317, + "args": { + "External id": 982758,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939072906.849, "dur": 3.037, + "args": { + "External id": 982759,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345939072912.546, "dur": 38.848, + "args": { + "External id": 982760,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939072918.978, "dur": 3.950, + "args": { + "External id": 982761,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345939072953.156, "dur": 2.102, + "args": { + "External id": 982762,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939072954.313, "dur": 0.849, + "args": { + "External id": 982763,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345939072956.765, "dur": 21.083, + "args": { + "External id": 982764,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939072961.299, "dur": 2.274, + "args": { + "External id": 982765,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345939073141.929, "dur": 46.429, + "args": { + "External id": 982766,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345939073222.443, "dur": 20.591, + "args": { + "External id": 982767,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939073254.501, "dur": 60.501, + "args": { + "External id": 982768,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939073324.218, "dur": 48.367, + "args": { + "External id": 982769,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939073383.918, "dur": 23.606, + "args": { + "External id": 982770,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939073413.612, "dur": 34.191, + "args": { + "External id": 982771,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939073455.924, "dur": 31.297, + "args": { + "External id": 982772,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939073496.170, "dur": 33.056, + "args": { + "External id": 982773,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345939073552.781, "dur": 24.591, + "args": { + "External id": 982774,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345939073598.708, "dur": 26.276, + "args": { + "External id": 982775,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345939073643.101, "dur": 18.315, + "args": { + "External id": 982776,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345939073684.202, "dur": 17.126, + "args": { + "External id": 982777,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345939073717.103, "dur": 18.960, + "args": { + "External id": 982778,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073818.362, "dur": 17.842, + "args": { + "External id": 982779,"Record function id": 0, "Ev Idx": 5370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073822.515, "dur": 12.708, + "args": { + "External id": 982780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073827.401, "dur": 6.639, + "args": { + "External id": 982781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073829.331, "dur": 4.594, + "args": { + "External id": 982782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073841.097, "dur": 5.666, + "args": { + "External id": 982783,"Record function id": 0, "Ev Idx": 5374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073843.057, "dur": 3.187, + "args": { + "External id": 982784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073844.086, "dur": 1.636, + "args": { + "External id": 982785,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073844.736, "dur": 0.881, + "args": { + "External id": 982786,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073851.011, "dur": 8.368, + "args": { + "External id": 982787,"Record function id": 0, "Ev Idx": 5378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073852.873, "dur": 6.006, + "args": { + "External id": 982788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073853.671, "dur": 4.692, + "args": { + "External id": 982789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073854.723, "dur": 3.555, + "args": { + "External id": 982790,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073863.352, "dur": 4.550, + "args": { + "External id": 982791,"Record function id": 0, "Ev Idx": 5382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073864.910, "dur": 2.430, + "args": { + "External id": 982792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073865.438, "dur": 1.430, + "args": { + "External id": 982793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073865.939, "dur": 0.852, + "args": { + "External id": 982794,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073871.739, "dur": 4.248, + "args": { + "External id": 982795,"Record function id": 0, "Ev Idx": 5386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073873.146, "dur": 2.339, + "args": { + "External id": 982796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073873.650, "dur": 1.326, + "args": { + "External id": 982797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073873.983, "dur": 0.903, + "args": { + "External id": 982798,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073879.800, "dur": 4.286, + "args": { + "External id": 982799,"Record function id": 0, "Ev Idx": 5390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073881.175, "dur": 2.417, + "args": { + "External id": 982800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073881.723, "dur": 1.355, + "args": { + "External id": 982801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073882.243, "dur": 0.748, + "args": { + "External id": 982802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073887.999, "dur": 6.629, + "args": { + "External id": 982803,"Record function id": 0, "Ev Idx": 5394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073889.678, "dur": 4.463, + "args": { + "External id": 982804,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073890.211, "dur": 3.356, + "args": { + "External id": 982805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073892.498, "dur": 0.979, + "args": { + "External id": 982806,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073898.458, "dur": 4.854, + "args": { + "External id": 982807,"Record function id": 0, "Ev Idx": 5398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073900.283, "dur": 2.539, + "args": { + "External id": 982808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073900.810, "dur": 1.503, + "args": { + "External id": 982809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073901.331, "dur": 0.893, + "args": { + "External id": 982810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073907.009, "dur": 5.410, + "args": { + "External id": 982811,"Record function id": 0, "Ev Idx": 5402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939073908.501, "dur": 3.410, + "args": { + "External id": 982812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073909.595, "dur": 1.832, + "args": { + "External id": 982813,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939073910.390, "dur": 0.948, + "args": { + "External id": 982814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939073917.724, "dur": 63818.524, + "args": { + "External id": 982815,"Record function id": 0, "Sequence number": 10552247, "Fwd thread id": 1, "Ev Idx": 5406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939073920.060, "dur": 63806.875, + "args": { + "External id": 982816,"Sequence number": 10552247, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5407 + } + }, + { + "ph": "f", "id": 219, "pid": 2338708, "tid": 2379421, "ts": 6345939073920.060, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 2338708, "tid": 2379421, + "ts": 6345939073953.936, "dur": 45.431, + "args": { + "External id": 982817,"Record function id": 0, "Ev Idx": 5408 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 2338708, "tid": 2379421, + "ts": 6345939074031.535, "dur": 125.520, + "args": { + "External id": 982818,"Record function id": 0, "Ev Idx": 5409 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 2338708, "tid": 2379421, + "ts": 6345939074167.700, "dur": 63549.740, + "args": { + "External id": 982819,"Record function id": 0, "Ev Idx": 5410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939074276.102, "dur": 8.669, + "args": { + "External id": 982820,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939074297.001, "dur": 8.163, + "args": { + "External id": 982821,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345939074322.273, "dur": 62426.304, + "args": { + "External id": 982822,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345939074339.413, "dur": 62394.684, + "args": { + "External id": 982823,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939074437.763, "dur": 19.909, + "args": { + "External id": 982824,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345939074480.244, "dur": 62202.592, + "args": { + "External id": 982825,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939074487.707, "dur": 62194.047, + "args": { + "External id": 982826,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939074492.698, "dur": 10.949, + "args": { + "External id": 982827,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939074505.621, "dur": 62170.263, + "args": { + "External id": 982828,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939136864.830, "dur": 13.161, + "args": { + "External id": 982829,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939136868.708, "dur": 8.709, + "args": { + "External id": 982830,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345939136913.737, "dur": 461.449, + "args": { + "External id": 982831,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939136949.916, "dur": 418.810, + "args": { + "External id": 982832,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5423, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345939136962.971, "dur": 398.734, + "args": { + "External id": 982833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939137405.146, "dur": 3.073, + "args": { + "External id": 982834,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5425, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939137481.743, "dur": 8.076, + "args": { + "External id": 982835,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939137539.419, "dur": 1.812, + "args": { + "External id": 982836,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939137558.241, "dur": 3.785, + "args": { + "External id": 982837,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939137575.038, "dur": 1.061, + "args": { + "External id": 982838,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939137590.948, "dur": 0.809, + "args": { + "External id": 982839,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939137602.613, "dur": 0.912, + "args": { + "External id": 982840,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939137614.918, "dur": 3.421, + "args": { + "External id": 982841,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939137631.194, "dur": 2.488, + "args": { + "External id": 982842,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939137644.303, "dur": 0.860, + "args": { + "External id": 982843,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939137753.048, "dur": 3409.689, + "args": { + "External id": 982844,"Record function id": 0, "Ev Idx": 5435 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 2338708, "tid": 2379421, + "ts": 6345939137774.996, "dur": 1269.928, + "args": { + "External id": 982845,"Record function id": 0, "Ev Idx": 5436 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338708, "tid": 2379421, + "ts": 6345939137793.542, "dur": 460.168, + "args": { + "External id": 982846,"Record function id": 0, "Ev Idx": 5437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939137886.847, "dur": 4.578, + "args": { + "External id": 982847,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939137895.249, "dur": 0.985, + "args": { + "External id": 982848,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939137900.613, "dur": 3.122, + "args": { + "External id": 982849,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939137905.483, "dur": 0.962, + "args": { + "External id": 982850,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939137908.150, "dur": 0.849, + "args": { + "External id": 982851,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939137910.657, "dur": 0.850, + "args": { + "External id": 982852,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939137915.700, "dur": 1.140, + "args": { + "External id": 982853,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939137918.573, "dur": 0.770, + "args": { + "External id": 982854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939137921.186, "dur": 0.848, + "args": { + "External id": 982855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939137923.803, "dur": 0.536, + "args": { + "External id": 982856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939137947.198, "dur": 264.225, + "args": { + "External id": 982857,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939137975.121, "dur": 229.358, + "args": { + "External id": 982858,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939137994.711, "dur": 42.956, + "args": { + "External id": 982859,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345939138043.063, "dur": 125.444, + "args": { + "External id": 982860,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939138045.921, "dur": 121.952, + "args": { + "External id": 982861,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138050.861, "dur": 47.146, + "args": { + "External id": 982862,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939138101.107, "dur": 66.051, + "args": { + "External id": 982863,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5454 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 2338708, "tid": 2379421, + "ts": 6345939138351.774, "dur": 678.070, + "args": { + "External id": 982864,"Record function id": 0, "Ev Idx": 5455 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338708, "tid": 2379421, + "ts": 6345939138373.282, "dur": 620.881, + "args": { + "External id": 982865,"Record function id": 0, "Ev Idx": 5456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939138443.380, "dur": 5.626, + "args": { + "External id": 982866,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345939138467.229, "dur": 36.333, + "args": { + "External id": 982867,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138473.282, "dur": 2.202, + "args": { + "External id": 982868,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138477.969, "dur": 2.435, + "args": { + "External id": 982869,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138482.319, "dur": 0.723, + "args": { + "External id": 982870,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138484.725, "dur": 0.402, + "args": { + "External id": 982871,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138488.369, "dur": 0.418, + "args": { + "External id": 982872,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138490.157, "dur": 2.955, + "args": { + "External id": 982873,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138494.751, "dur": 0.289, + "args": { + "External id": 982874,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138497.988, "dur": 0.270, + "args": { + "External id": 982875,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138499.723, "dur": 0.346, + "args": { + "External id": 982876,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939138516.145, "dur": 48.566, + "args": { + "External id": 982877,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345939138599.765, "dur": 132.364, + "args": { + "External id": 982878,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 5469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939138612.076, "dur": 4.059, + "args": { + "External id": 982879,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345939138622.111, "dur": 12.325, + "args": { + "External id": 982880,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345939138627.058, "dur": 6.934, + "args": { + "External id": 982881,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 5472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138631.599, "dur": 0.757, + "args": { + "External id": 982882,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 5473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345939138642.998, "dur": 32.981, + "args": { + "External id": 982883,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 5474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138646.059, "dur": 0.574, + "args": { + "External id": 982884,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138649.601, "dur": 0.455, + "args": { + "External id": 982885,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138651.436, "dur": 3.045, + "args": { + "External id": 982886,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138656.436, "dur": 1.498, + "args": { + "External id": 982887,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138659.536, "dur": 0.667, + "args": { + "External id": 982888,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138661.481, "dur": 0.413, + "args": { + "External id": 982889,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138664.930, "dur": 0.350, + "args": { + "External id": 982890,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138666.910, "dur": 0.565, + "args": { + "External id": 982891,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939138668.759, "dur": 0.484, + "args": { + "External id": 982892,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939138689.552, "dur": 32.645, + "args": { + "External id": 982893,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 5484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345939138781.330, "dur": 135.247, + "args": { + "External id": 982894,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 5485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939138814.093, "dur": 98.186, + "args": { + "External id": 982895,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5486, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345939138824.386, "dur": 82.860, + "args": { + "External id": 982896,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 5487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939138937.142, "dur": 2.146, + "args": { + "External id": 982897,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5488, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939139094.055, "dur": 2044.888, + "args": { + "External id": 982898,"Sequence number": 10552246, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5489 + } + }, + { + "ph": "f", "id": 220, "pid": 2338708, "tid": 2379421, "ts": 6345939139094.055, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939139228.578, "dur": 125.427, + "args": { + "External id": 982899,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345939139405.025, "dur": 45.753, + "args": { + "External id": 982900,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345939139470.268, "dur": 57.557, + "args": { + "External id": 982901,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939139541.624, "dur": 35.755, + "args": { + "External id": 982902,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939139584.909, "dur": 35.856, + "args": { + "External id": 982903,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939139628.352, "dur": 32.202, + "args": { + "External id": 982904,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939139668.852, "dur": 31.907, + "args": { + "External id": 982905,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345939139730.277, "dur": 26.117, + "args": { + "External id": 982906,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345939139779.659, "dur": 30.643, + "args": { + "External id": 982907,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345939139836.479, "dur": 20.900, + "args": { + "External id": 982908,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345939139877.445, "dur": 15.801, + "args": { + "External id": 982909,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939139901.889, "dur": 40.968, + "args": { + "External id": 982910,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939139947.066, "dur": 36.346, + "args": { + "External id": 982911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345939140047.998, "dur": 359.762, + "args": { + "External id": 982912,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939140183.232, "dur": 8.656, + "args": { + "External id": 982913,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939140194.758, "dur": 3.156, + "args": { + "External id": 982914,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939140199.678, "dur": 2.619, + "args": { + "External id": 982915,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939140203.774, "dur": 2.187, + "args": { + "External id": 982916,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345939140255.690, "dur": 7.617, + "args": { + "External id": 982917,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939140258.407, "dur": 4.653, + "args": { + "External id": 982918,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345939140265.595, "dur": 37.040, + "args": { + "External id": 982919,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939140272.761, "dur": 3.640, + "args": { + "External id": 982920,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345939140306.922, "dur": 1.951, + "args": { + "External id": 982921,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939140308.088, "dur": 0.673, + "args": { + "External id": 982922,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345939140310.192, "dur": 41.207, + "args": { + "External id": 982923,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939140326.824, "dur": 1.284, + "args": { + "External id": 982924,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345939140456.475, "dur": 32.300, + "args": { + "External id": 982925,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345939140513.606, "dur": 17.993, + "args": { + "External id": 982926,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939140540.690, "dur": 64.463, + "args": { + "External id": 982927,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939140613.356, "dur": 47.826, + "args": { + "External id": 982928,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939140676.679, "dur": 24.804, + "args": { + "External id": 982929,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939140708.215, "dur": 38.887, + "args": { + "External id": 982930,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939140755.529, "dur": 31.769, + "args": { + "External id": 982931,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939140794.399, "dur": 33.105, + "args": { + "External id": 982932,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345939140851.805, "dur": 25.999, + "args": { + "External id": 982933,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345939140899.440, "dur": 26.083, + "args": { + "External id": 982934,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345939140946.057, "dur": 18.620, + "args": { + "External id": 982935,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345939140984.655, "dur": 15.594, + "args": { + "External id": 982936,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345939141041.444, "dur": 59.090, + "args": { + "External id": 982937,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141189.440, "dur": 20.419, + "args": { + "External id": 982938,"Record function id": 0, "Ev Idx": 5529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141193.384, "dur": 15.280, + "args": { + "External id": 982939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141198.551, "dur": 9.004, + "args": { + "External id": 982940,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141201.076, "dur": 6.325, + "args": { + "External id": 982941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141214.676, "dur": 6.196, + "args": { + "External id": 982942,"Record function id": 0, "Ev Idx": 5533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141217.024, "dur": 3.317, + "args": { + "External id": 982943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141218.012, "dur": 1.788, + "args": { + "External id": 982944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141218.769, "dur": 0.910, + "args": { + "External id": 982945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141224.651, "dur": 7.605, + "args": { + "External id": 982946,"Record function id": 0, "Ev Idx": 5537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141226.360, "dur": 5.421, + "args": { + "External id": 982947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141227.291, "dur": 3.966, + "args": { + "External id": 982948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141227.949, "dur": 3.231, + "args": { + "External id": 982949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141236.057, "dur": 4.780, + "args": { + "External id": 982950,"Record function id": 0, "Ev Idx": 5541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141237.621, "dur": 2.741, + "args": { + "External id": 982951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141238.263, "dur": 1.622, + "args": { + "External id": 982952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141238.602, "dur": 1.195, + "args": { + "External id": 982953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141244.752, "dur": 4.532, + "args": { + "External id": 982954,"Record function id": 0, "Ev Idx": 5545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141246.030, "dur": 2.765, + "args": { + "External id": 982955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141246.717, "dur": 1.594, + "args": { + "External id": 982956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141247.329, "dur": 0.895, + "args": { + "External id": 982957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141253.100, "dur": 7.940, + "args": { + "External id": 982958,"Record function id": 0, "Ev Idx": 5549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141254.942, "dur": 5.588, + "args": { + "External id": 982959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141255.700, "dur": 4.353, + "args": { + "External id": 982960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141259.164, "dur": 0.803, + "args": { + "External id": 982961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141264.849, "dur": 5.151, + "args": { + "External id": 982962,"Record function id": 0, "Ev Idx": 5553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141266.787, "dur": 2.750, + "args": { + "External id": 982963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141267.461, "dur": 1.568, + "args": { + "External id": 982964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141268.010, "dur": 0.934, + "args": { + "External id": 982965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141273.622, "dur": 4.642, + "args": { + "External id": 982966,"Record function id": 0, "Ev Idx": 5557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141274.992, "dur": 2.826, + "args": { + "External id": 982967,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141275.521, "dur": 1.803, + "args": { + "External id": 982968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141276.038, "dur": 1.198, + "args": { + "External id": 982969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141282.264, "dur": 4.996, + "args": { + "External id": 982970,"Record function id": 0, "Ev Idx": 5561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939141284.040, "dur": 2.741, + "args": { + "External id": 982971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141284.891, "dur": 1.383, + "args": { + "External id": 982972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939141285.528, "dur": 0.658, + "args": { + "External id": 982973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939141293.061, "dur": 62484.358, + "args": { + "External id": 982974,"Record function id": 0, "Sequence number": 10552245, "Fwd thread id": 1, "Ev Idx": 5565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939141295.113, "dur": 62471.816, + "args": { + "External id": 982975,"Sequence number": 10552245, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5566 + } + }, + { + "ph": "f", "id": 221, "pid": 2338708, "tid": 2379421, "ts": 6345939141295.113, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 2338708, "tid": 2379421, + "ts": 6345939141331.051, "dur": 43.699, + "args": { + "External id": 982976,"Record function id": 0, "Ev Idx": 5567 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 2338708, "tid": 2379421, + "ts": 6345939141384.133, "dur": 78.806, + "args": { + "External id": 982977,"Record function id": 0, "Ev Idx": 5568 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 2338708, "tid": 2379421, + "ts": 6345939141470.649, "dur": 62286.966, + "args": { + "External id": 982978,"Record function id": 0, "Ev Idx": 5569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939141573.637, "dur": 8.452, + "args": { + "External id": 982979,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939141593.360, "dur": 7.385, + "args": { + "External id": 982980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345939141617.185, "dur": 61132.638, + "args": { + "External id": 982981,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345939141633.055, "dur": 61101.196, + "args": { + "External id": 982982,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939141735.436, "dur": 18.762, + "args": { + "External id": 982983,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345939141783.350, "dur": 60898.874, + "args": { + "External id": 982984,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939141788.945, "dur": 60892.233, + "args": { + "External id": 982985,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939141794.123, "dur": 12.066, + "args": { + "External id": 982986,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939141808.517, "dur": 60866.915, + "args": { + "External id": 982987,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939202870.601, "dur": 14.314, + "args": { + "External id": 982988,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939202874.571, "dur": 9.795, + "args": { + "External id": 982989,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345939202920.068, "dur": 502.358, + "args": { + "External id": 982990,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939202957.172, "dur": 458.675, + "args": { + "External id": 982991,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5582, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345939202970.834, "dur": 437.821, + "args": { + "External id": 982992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939203452.997, "dur": 2.664, + "args": { + "External id": 982993,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5584, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939203532.214, "dur": 8.310, + "args": { + "External id": 982994,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939203589.275, "dur": 1.602, + "args": { + "External id": 982995,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939203607.546, "dur": 3.860, + "args": { + "External id": 982996,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939203624.336, "dur": 0.944, + "args": { + "External id": 982997,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939203637.827, "dur": 1.073, + "args": { + "External id": 982998,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939203650.523, "dur": 0.755, + "args": { + "External id": 982999,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939203664.878, "dur": 2.923, + "args": { + "External id": 983000,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939203679.286, "dur": 1.483, + "args": { + "External id": 983001,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939203690.622, "dur": 0.854, + "args": { + "External id": 983002,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939203794.002, "dur": 2700.170, + "args": { + "External id": 983003,"Record function id": 0, "Ev Idx": 5594 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 2338708, "tid": 2379421, + "ts": 6345939203816.621, "dur": 579.864, + "args": { + "External id": 983004,"Record function id": 0, "Ev Idx": 5595 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338708, "tid": 2379421, + "ts": 6345939203834.165, "dur": 455.962, + "args": { + "External id": 983005,"Record function id": 0, "Ev Idx": 5596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939203932.921, "dur": 4.562, + "args": { + "External id": 983006,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939203941.573, "dur": 1.522, + "args": { + "External id": 983007,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939203947.579, "dur": 3.676, + "args": { + "External id": 983008,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939203953.352, "dur": 1.142, + "args": { + "External id": 983009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939203956.396, "dur": 1.010, + "args": { + "External id": 983010,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939203958.869, "dur": 1.252, + "args": { + "External id": 983011,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 5602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939203964.420, "dur": 1.226, + "args": { + "External id": 983012,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 5603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939203967.426, "dur": 1.091, + "args": { + "External id": 983013,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939203970.154, "dur": 1.060, + "args": { + "External id": 983014,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939203972.875, "dur": 1.103, + "args": { + "External id": 983015,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 5606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939203995.974, "dur": 249.143, + "args": { + "External id": 983016,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939204035.177, "dur": 202.953, + "args": { + "External id": 983017,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 5608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939204098.482, "dur": 19.358, + "args": { + "External id": 983018,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345939204122.357, "dur": 80.434, + "args": { + "External id": 983019,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 5610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939204125.126, "dur": 77.164, + "args": { + "External id": 983020,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 5611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939204129.530, "dur": 8.502, + "args": { + "External id": 983021,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939204139.986, "dur": 61.522, + "args": { + "External id": 983022,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 5613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939204405.840, "dur": 2064.255, + "args": { + "External id": 983023,"Sequence number": 10552244, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5614 + } + }, + { + "ph": "f", "id": 222, "pid": 2338708, "tid": 2379421, "ts": 6345939204405.840, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939204530.030, "dur": 117.688, + "args": { + "External id": 983024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 5615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345939204698.293, "dur": 44.376, + "args": { + "External id": 983025,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 5616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345939204763.696, "dur": 54.983, + "args": { + "External id": 983026,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 5617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939204829.720, "dur": 34.405, + "args": { + "External id": 983027,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939204873.070, "dur": 35.724, + "args": { + "External id": 983028,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939204916.256, "dur": 30.825, + "args": { + "External id": 983029,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 5620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939204957.253, "dur": 33.306, + "args": { + "External id": 983030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 5621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345939205044.343, "dur": 76.639, + "args": { + "External id": 983031,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 5622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345939205146.656, "dur": 34.010, + "args": { + "External id": 983032,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345939205212.320, "dur": 25.066, + "args": { + "External id": 983033,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345939205253.394, "dur": 18.272, + "args": { + "External id": 983034,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939205283.586, "dur": 49.480, + "args": { + "External id": 983035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939205337.646, "dur": 39.019, + "args": { + "External id": 983036,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345939205411.015, "dur": 317.116, + "args": { + "External id": 983037,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 5628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939205504.296, "dur": 8.044, + "args": { + "External id": 983038,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939205514.697, "dur": 2.842, + "args": { + "External id": 983039,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939205519.121, "dur": 4.327, + "args": { + "External id": 983040,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939205552.447, "dur": 2.656, + "args": { + "External id": 983041,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345939205605.158, "dur": 6.900, + "args": { + "External id": 983042,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939205608.555, "dur": 3.294, + "args": { + "External id": 983043,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345939205614.161, "dur": 36.614, + "args": { + "External id": 983044,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939205620.758, "dur": 1.812, + "args": { + "External id": 983045,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345939205654.708, "dur": 1.761, + "args": { + "External id": 983046,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939205655.788, "dur": 0.593, + "args": { + "External id": 983047,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 5638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345939205657.413, "dur": 19.996, + "args": { + "External id": 983048,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 5639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939205661.516, "dur": 0.566, + "args": { + "External id": 983049,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 5640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345939205770.358, "dur": 31.248, + "args": { + "External id": 983050,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345939205821.542, "dur": 20.233, + "args": { + "External id": 983051,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939205854.845, "dur": 44.265, + "args": { + "External id": 983052,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939205908.183, "dur": 40.865, + "args": { + "External id": 983053,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939205960.345, "dur": 23.432, + "args": { + "External id": 983054,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 5645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939205990.467, "dur": 99.289, + "args": { + "External id": 983055,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 5646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939206103.597, "dur": 38.789, + "args": { + "External id": 983056,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 5647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345939206151.654, "dur": 34.684, + "args": { + "External id": 983057,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 5648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345939206216.650, "dur": 28.735, + "args": { + "External id": 983058,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 5649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345939206277.567, "dur": 37.289, + "args": { + "External id": 983059,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345939206337.870, "dur": 19.753, + "args": { + "External id": 983060,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 5651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345939206379.832, "dur": 15.254, + "args": { + "External id": 983061,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 5652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345939206413.824, "dur": 17.756, + "args": { + "External id": 983062,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 5653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206520.096, "dur": 17.188, + "args": { + "External id": 983063,"Record function id": 0, "Ev Idx": 5654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206524.079, "dur": 12.057, + "args": { + "External id": 983064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206528.765, "dur": 6.221, + "args": { + "External id": 983065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206530.510, "dur": 4.374, + "args": { + "External id": 983066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206542.002, "dur": 7.875, + "args": { + "External id": 983067,"Record function id": 0, "Ev Idx": 5658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206543.915, "dur": 5.413, + "args": { + "External id": 983068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206544.631, "dur": 4.030, + "args": { + "External id": 983069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206545.219, "dur": 3.362, + "args": { + "External id": 983070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206553.817, "dur": 5.717, + "args": { + "External id": 983071,"Record function id": 0, "Ev Idx": 5662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206555.700, "dur": 3.320, + "args": { + "External id": 983072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206556.436, "dur": 2.014, + "args": { + "External id": 983073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206557.324, "dur": 0.982, + "args": { + "External id": 983074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 5665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206563.364, "dur": 5.482, + "args": { + "External id": 983075,"Record function id": 0, "Ev Idx": 5666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206565.020, "dur": 3.301, + "args": { + "External id": 983076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206566.141, "dur": 1.668, + "args": { + "External id": 983077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206566.919, "dur": 0.805, + "args": { + "External id": 983078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 5669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206572.610, "dur": 4.888, + "args": { + "External id": 983079,"Record function id": 0, "Ev Idx": 5670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206574.157, "dur": 2.800, + "args": { + "External id": 983080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206574.993, "dur": 1.423, + "args": { + "External id": 983081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206575.616, "dur": 0.720, + "args": { + "External id": 983082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206581.371, "dur": 6.766, + "args": { + "External id": 983083,"Record function id": 0, "Ev Idx": 5674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206582.792, "dur": 4.832, + "args": { + "External id": 983084,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206583.676, "dur": 3.410, + "args": { + "External id": 983085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206586.307, "dur": 0.699, + "args": { + "External id": 983086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 5677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206592.278, "dur": 4.724, + "args": { + "External id": 983087,"Record function id": 0, "Ev Idx": 5678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206593.866, "dur": 2.580, + "args": { + "External id": 983088,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206594.683, "dur": 1.251, + "args": { + "External id": 983089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206595.184, "dur": 0.660, + "args": { + "External id": 983090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206600.954, "dur": 4.993, + "args": { + "External id": 983091,"Record function id": 0, "Ev Idx": 5682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206602.942, "dur": 2.516, + "args": { + "External id": 983092,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206603.598, "dur": 1.384, + "args": { + "External id": 983093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206604.201, "dur": 0.707, + "args": { + "External id": 983094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 5685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206609.947, "dur": 6.863, + "args": { + "External id": 983095,"Record function id": 0, "Ev Idx": 5686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939206611.557, "dur": 4.753, + "args": { + "External id": 983096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206612.563, "dur": 3.231, + "args": { + "External id": 983097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939206613.042, "dur": 2.668, + "args": { + "External id": 983098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 5689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939206621.910, "dur": 63664.151, + "args": { + "External id": 983099,"Record function id": 0, "Sequence number": 10552243, "Fwd thread id": 1, "Ev Idx": 5690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939206623.598, "dur": 63651.967, + "args": { + "External id": 983100,"Sequence number": 10552243, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5691 + } + }, + { + "ph": "f", "id": 223, "pid": 2338708, "tid": 2379421, "ts": 6345939206623.598, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 2338708, "tid": 2379421, + "ts": 6345939206660.176, "dur": 45.263, + "args": { + "External id": 983101,"Record function id": 0, "Ev Idx": 5692 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 2338708, "tid": 2379421, + "ts": 6345939206714.607, "dur": 79.481, + "args": { + "External id": 983102,"Record function id": 0, "Ev Idx": 5693 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 2338708, "tid": 2379421, + "ts": 6345939206800.979, "dur": 63464.614, + "args": { + "External id": 983103,"Record function id": 0, "Ev Idx": 5694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939206903.835, "dur": 8.382, + "args": { + "External id": 983104,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939206923.841, "dur": 5.053, + "args": { + "External id": 983105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 5696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345939206946.278, "dur": 62333.276, + "args": { + "External id": 983106,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345939206961.122, "dur": 62303.691, + "args": { + "External id": 983107,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 5698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939207135.201, "dur": 21.222, + "args": { + "External id": 983108,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345939207183.392, "dur": 62034.040, + "args": { + "External id": 983109,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 5700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939207191.279, "dur": 62025.126, + "args": { + "External id": 983110,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 5701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939207196.851, "dur": 12.908, + "args": { + "External id": 983111,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939207212.139, "dur": 61998.829, + "args": { + "External id": 983112,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 5703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939269399.836, "dur": 11.902, + "args": { + "External id": 983113,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 5704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939269403.418, "dur": 7.915, + "args": { + "External id": 983114,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345939269443.849, "dur": 410.302, + "args": { + "External id": 983115,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 5706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939269478.472, "dur": 369.697, + "args": { + "External id": 983116,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5707, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345939269493.200, "dur": 348.909, + "args": { + "External id": 983117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 5708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939269880.157, "dur": 2.456, + "args": { + "External id": 983118,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5709, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939269949.029, "dur": 7.554, + "args": { + "External id": 983119,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939270032.297, "dur": 5.490, + "args": { + "External id": 983120,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939270101.135, "dur": 2.601, + "args": { + "External id": 983121,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939270119.957, "dur": 0.981, + "args": { + "External id": 983122,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939270134.170, "dur": 0.928, + "args": { + "External id": 983123,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939270148.112, "dur": 2.975, + "args": { + "External id": 983124,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939270163.175, "dur": 0.848, + "args": { + "External id": 983125,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939270178.966, "dur": 0.946, + "args": { + "External id": 983126,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939270190.857, "dur": 0.727, + "args": { + "External id": 983127,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 5718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939270306.445, "dur": 325.561, + "args": { + "External id": 983128,"Record function id": 0, "Sequence number": 10552242, "Fwd thread id": 1, "Ev Idx": 5719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345939270309.692, "dur": 311.250, + "args": { + "External id": 983129,"Sequence number": 10552242, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5720 + } + }, + { + "ph": "f", "id": 224, "pid": 2338708, "tid": 2379421, "ts": 6345939270309.692, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 2338708, "tid": 2379421, + "ts": 6345939270444.059, "dur": 58.834, + "args": { + "External id": 983130,"kernel_hash": "c5m7emojmcmpfnsytzs4n2vhybuspjxfkuji6biwd2ecull3vbnp", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/5m/c5m7emojmcmpfnsytzs4n2vhybuspjxfkuji6biwd2ecull3vbnp.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 5721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 2338708, "tid": 2379421, + "ts": 6345939270522.415, "dur": 31.657, + "args": { + "External id": 983131,"kernel_hash": "c46xff3fh3ar7hq2aefm4fztaqpffb3u6n2xaouky6dh4l2633ed", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/46/c46xff3fh3ar7hq2aefm4fztaqpffb3u6n2xaouky6dh4l2633ed.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096, 4096], [32000, 4096], []], "Ev Idx": 5722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 2338708, "tid": 2379421, + "ts": 6345939270577.571, "dur": 24.719, + "args": { + "External id": 983132,"kernel_hash": "cj4ssgwdjcekiff7t7cfceucpuq2k6lgzvcstcuozoccjjbnb5tv", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4ssgwdjcekiff7t7cfceucpuq2k6lgzvcstcuozoccjjbnb5tv.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939270643.801, "dur": 14.257, + "args": { + "External id": 983133,"Record function id": 0, "Ev Idx": 5724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345939270646.811, "dur": 10.237, + "args": { + "External id": 983134,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939270650.126, "dur": 5.850, + "args": { + "External id": 983135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345939270651.485, "dur": 4.366, + "args": { + "External id": 983136,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 5727 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 2338708, "tid": 2379421, + "ts": 6345939270682.560, "dur": 19945.352, + "args": { + "External id": 983137,"Record function id": 0, "Ev Idx": 5728 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 2338708, "tid": 2379421, + "ts": 6345939270705.016, "dur": 38.704, + "args": { + "External id": 983138,"Record function id": 0, "Ev Idx": 5729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 2338708, "tid": 2379421, + "ts": 6345939270751.051, "dur": 231.120, + "args": { + "External id": 983139,"Record function id": 0, "Ev Idx": 5730 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 2338708, "tid": 2379421, + "ts": 6345939270988.976, "dur": 19359.029, + "args": { + "External id": 983140,"Record function id": 0, "Ev Idx": 5731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939271187.561, "dur": 10.511, + "args": { + "External id": 983141,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345939271212.862, "dur": 6.649, + "args": { + "External id": 983142,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 5733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345939271245.323, "dur": 17634.192, + "args": { + "External id": 983143,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 5734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345939271266.854, "dur": 17596.927, + "args": { + "External id": 983144,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 5735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939272095.078, "dur": 28.026, + "args": { + "External id": 983145,"Record function id": 0, "Concrete Inputs": ["[277237]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345939272355.347, "dur": 16447.600, + "args": { + "External id": 983146,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], [], []], "Ev Idx": 5737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345939272359.102, "dur": 16442.352, + "args": { + "External id": 983147,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], []], "Ev Idx": 5738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939272367.275, "dur": 19.952, + "args": { + "External id": 983148,"Record function id": 0, "Concrete Inputs": ["[277237]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345939272390.086, "dur": 16404.113, + "args": { + "External id": 983149,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[277237], [277237], []], "Ev Idx": 5740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939289096.035, "dur": 14.727, + "args": { + "External id": 983150,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1134596096], [], [], [], [], []], "Ev Idx": 5741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345939289100.297, "dur": 9.723, + "args": { + "External id": 983151,"Record function id": 0, "Concrete Inputs": ["[141824512]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345939289145.111, "dur": 406.868, + "args": { + "External id": 983152,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[141824512], [1134596096], [], [], [], []], "Ev Idx": 5743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939289178.666, "dur": 368.180, + "args": { + "External id": 983153,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 141824512, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1134596096], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5744, "In msg nelems": 1134596096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345939289193.525, "dur": 346.139, + "args": { + "External id": 983154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1134596096]], "Ev Idx": 5745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345939289575.710, "dur": 2.793, + "args": { + "External id": 983155,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5746, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289645.448, "dur": 7.646, + "args": { + "External id": 983156,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289702.338, "dur": 1.309, + "args": { + "External id": 983157,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289719.148, "dur": 1.389, + "args": { + "External id": 983158,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "16384512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289732.858, "dur": 0.867, + "args": { + "External id": 983159,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "18481664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289747.593, "dur": 0.869, + "args": { + "External id": 983160,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "19005952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289765.114, "dur": 0.890, + "args": { + "External id": 983161,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "19530240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289778.158, "dur": 0.821, + "args": { + "External id": 983162,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289790.960, "dur": 0.965, + "args": { + "External id": 983163,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "21627904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289803.759, "dur": 0.712, + "args": { + "External id": 983164,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "28967936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289818.036, "dur": 0.838, + "args": { + "External id": 983165,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "36307968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289830.288, "dur": 0.790, + "args": { + "External id": 983166,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289845.231, "dur": 0.751, + "args": { + "External id": 983167,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "43648512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289857.872, "dur": 0.939, + "args": { + "External id": 983168,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "45745664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289870.854, "dur": 0.755, + "args": { + "External id": 983169,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "46269952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289882.071, "dur": 1.420, + "args": { + "External id": 983170,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "46794240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289895.434, "dur": 1.100, + "args": { + "External id": 983171,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289909.443, "dur": 0.940, + "args": { + "External id": 983172,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "48891904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289922.818, "dur": 0.906, + "args": { + "External id": 983173,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "56231936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289934.609, "dur": 0.915, + "args": { + "External id": 983174,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "63571968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289946.079, "dur": 0.819, + "args": { + "External id": 983175,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289958.904, "dur": 3.047, + "args": { + "External id": 983176,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "70912512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289975.997, "dur": 1.147, + "args": { + "External id": 983177,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73009664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939289987.011, "dur": 1.009, + "args": { + "External id": 983178,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73533952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290026.434, "dur": 2.552, + "args": { + "External id": 983179,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "74058240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290047.829, "dur": 1.023, + "args": { + "External id": 983180,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290103.120, "dur": 1.693, + "args": { + "External id": 983181,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "76155904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290118.564, "dur": 1.197, + "args": { + "External id": 983182,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "83495936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290130.086, "dur": 1.135, + "args": { + "External id": 983183,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "90835968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290142.815, "dur": 2.673, + "args": { + "External id": 983184,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290158.682, "dur": 1.047, + "args": { + "External id": 983185,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "98176512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290170.004, "dur": 0.931, + "args": { + "External id": 983186,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100273664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290183.344, "dur": 1.157, + "args": { + "External id": 983187,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100797952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290194.500, "dur": 1.555, + "args": { + "External id": 983188,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "101322240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290208.273, "dur": 0.755, + "args": { + "External id": 983189,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290218.824, "dur": 2.731, + "args": { + "External id": 983190,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "103419904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290232.151, "dur": 0.818, + "args": { + "External id": 983191,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "110759936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290246.177, "dur": 1.053, + "args": { + "External id": 983192,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "118099968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290260.802, "dur": 0.868, + "args": { + "External id": 983193,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345939290271.793, "dur": 0.897, + "args": { + "External id": 983194,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "125440512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 5785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940426660.984, "dur": 119.574, + "args": { + "External id": 983195,"Record function id": 0, "Sequence number": 10552697, "Fwd thread id": 1, "Ev Idx": 5786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940426671.075, "dur": 99.125, + "args": { + "External id": 983196,"Sequence number": 10552697, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5787 + } + }, + { + "ph": "f", "id": 225, "pid": 2338708, "tid": 2379421, "ts": 6345940426671.075, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338708, "tid": 2379421, + "ts": 6345940426680.276, "dur": 88.327, + "args": { + "External id": 983197,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940426790.423, "dur": 315.102, + "args": { + "External id": 983198,"Record function id": 0, "Ev Idx": 5789 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940426866.178, "dur": 96.529, + "args": { + "External id": 983199,"Record function id": 0, "Ev Idx": 5790 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 2338708, "tid": 2379421, + "ts": 6345940426896.511, "dur": 52.788, + "args": { + "External id": 983200,"Record function id": 0, "Ev Idx": 5791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940426968.713, "dur": 3.006, + "args": { + "External id": 983201,"Sequence number": 10552696, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5792 + } + }, + { + "ph": "f", "id": 226, "pid": 2338708, "tid": 2379421, "ts": 6345940426968.713, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940426976.919, "dur": 120.619, + "args": { + "External id": 983202,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940426982.783, "dur": 70.263, + "args": { + "External id": 983203,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940426994.289, "dur": 3.423, + "args": { + "External id": 983204,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940427120.134, "dur": 37497.903, + "args": { + "External id": 983205,"Record function id": 0, "Sequence number": 10552694, "Fwd thread id": 1, "Ev Idx": 5796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940427123.113, "dur": 37477.723, + "args": { + "External id": 983206,"Sequence number": 10552694, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5797 + } + }, + { + "ph": "f", "id": 227, "pid": 2338708, "tid": 2379421, "ts": 6345940427123.113, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940427172.046, "dur": 6.817, + "args": { + "External id": 983207,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940427184.336, "dur": 37102.202, + "args": { + "External id": 983208,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940427186.682, "dur": 37099.337, + "args": { + "External id": 983209,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940427191.118, "dur": 7.626, + "args": { + "External id": 983210,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940427200.848, "dur": 37083.488, + "args": { + "External id": 983211,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338708, "tid": 2379421, + "ts": 6345940464292.293, "dur": 0.775, + "args": { + "External id": 983212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345940464295.537, "dur": 3.226, + "args": { + "External id": 983213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345940464297.218, "dur": 1.382, + "args": { + "External id": 983214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2379421, + "ts": 6345940464305.637, "dur": 38.874, + "args": { + "External id": 983215,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338708, "tid": 2379421, + "ts": 6345940464354.681, "dur": 52.212, + "args": { + "External id": 983216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2379421, + "ts": 6345940464356.833, "dur": 49.726, + "args": { + "External id": 983217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2379421, + "ts": 6345940464358.727, "dur": 47.569, + "args": { + "External id": 983218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940464634.178, "dur": 20.309, + "args": { + "External id": 983219,"Record function id": 0, "Sequence number": 10552693, "Fwd thread id": 1, "Ev Idx": 5810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940464636.541, "dur": 15.072, + "args": { + "External id": 983220,"Sequence number": 10552693, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5811 + } + }, + { + "ph": "f", "id": 228, "pid": 2338708, "tid": 2379421, "ts": 6345940464636.541, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940464642.591, "dur": 8.706, + "args": { + "External id": 983221,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940464644.623, "dur": 6.418, + "args": { + "External id": 983222,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940464659.437, "dur": 122.454, + "args": { + "External id": 983223,"Record function id": 0, "Sequence number": 10552692, "Fwd thread id": 1, "Ev Idx": 5814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940464660.996, "dur": 113.112, + "args": { + "External id": 983224,"Sequence number": 10552692, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5815 + } + }, + { + "ph": "f", "id": 229, "pid": 2338708, "tid": 2379421, "ts": 6345940464660.996, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940464665.594, "dur": 107.978, + "args": { + "External id": 983225,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940464672.655, "dur": 40.607, + "args": { + "External id": 983226,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940464678.964, "dur": 6.397, + "args": { + "External id": 983227,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940464687.260, "dur": 25.516, + "args": { + "External id": 983228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940464690.899, "dur": 21.251, + "args": { + "External id": 983229,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940464715.913, "dur": 7.778, + "args": { + "External id": 983230,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940464721.079, "dur": 2.060, + "args": { + "External id": 983231,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940464725.873, "dur": 46.405, + "args": { + "External id": 983232,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940464787.766, "dur": 73.014, + "args": { + "External id": 983233,"Record function id": 0, "Sequence number": 10552691, "Fwd thread id": 1, "Ev Idx": 5824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940464789.297, "dur": 67.919, + "args": { + "External id": 983234,"Sequence number": 10552691, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5825 + } + }, + { + "ph": "f", "id": 230, "pid": 2338708, "tid": 2379421, "ts": 6345940464789.297, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940464793.486, "dur": 63.408, + "args": { + "External id": 983235,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "3"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940464798.397, "dur": 25.757, + "args": { + "External id": 983236,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940464799.843, "dur": 3.207, + "args": { + "External id": 983237,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940464804.035, "dur": 19.732, + "args": { + "External id": 983238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940464809.437, "dur": 13.925, + "args": { + "External id": 983239,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345940464826.121, "dur": 8.489, + "args": { + "External id": 983240,"Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940464832.390, "dur": 1.384, + "args": { + "External id": 983241,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940464835.973, "dur": 20.160, + "args": { + "External id": 983242,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940464865.908, "dur": 293.198, + "args": { + "External id": 983243,"Record function id": 0, "Sequence number": 10552690, "Fwd thread id": 1, "Ev Idx": 5834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940464867.324, "dur": 284.922, + "args": { + "External id": 983244,"Sequence number": 10552690, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5835 + } + }, + { + "ph": "f", "id": 231, "pid": 2338708, "tid": 2379421, "ts": 6345940464867.324, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940464869.865, "dur": 281.748, + "args": { + "External id": 983245,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940464871.674, "dur": 24.899, + "args": { + "External id": 983246,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940464873.281, "dur": 2.775, + "args": { + "External id": 983247,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940464876.833, "dur": 19.388, + "args": { + "External id": 983248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940464881.109, "dur": 14.615, + "args": { + "External id": 983249,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940464897.717, "dur": 5.195, + "args": { + "External id": 983250,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940464901.024, "dur": 1.624, + "args": { + "External id": 983251,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940464904.488, "dur": 245.052, + "args": { + "External id": 983252,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940465168.569, "dur": 115.798, + "args": { + "External id": 983253,"Record function id": 0, "Sequence number": 10552689, "Fwd thread id": 1, "Ev Idx": 5844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940465170.016, "dur": 109.515, + "args": { + "External id": 983254,"Sequence number": 10552689, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5845 + } + }, + { + "ph": "f", "id": 232, "pid": 2338708, "tid": 2379421, "ts": 6345940465170.016, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940465173.056, "dur": 105.970, + "args": { + "External id": 983255,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940465175.076, "dur": 25.277, + "args": { + "External id": 983256,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940465176.860, "dur": 3.551, + "args": { + "External id": 983257,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940465181.408, "dur": 18.607, + "args": { + "External id": 983258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940465183.792, "dur": 15.648, + "args": { + "External id": 983259,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940465203.093, "dur": 7.021, + "args": { + "External id": 983260,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940465206.296, "dur": 3.480, + "args": { + "External id": 983261,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940465217.023, "dur": 60.841, + "args": { + "External id": 983262,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940465291.053, "dur": 44.585, + "args": { + "External id": 983263,"Record function id": 0, "Sequence number": 10552688, "Fwd thread id": 1, "Ev Idx": 5854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940465292.830, "dur": 1.431, + "args": { + "External id": 983264,"Sequence number": 10552688, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5855 + } + }, + { + "ph": "f", "id": 233, "pid": 2338708, "tid": 2379421, "ts": 6345940465292.830, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940465297.191, "dur": 34.557, + "args": { + "External id": 983265,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940465300.169, "dur": 31.126, + "args": { + "External id": 983266,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940465308.213, "dur": 1.052, + "args": { + "External id": 983267,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940465342.678, "dur": 2351.339, + "args": { + "External id": 983268,"Record function id": 0, "Sequence number": 10552686, "Fwd thread id": 1, "Ev Idx": 5859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940465344.896, "dur": 2301.554, + "args": { + "External id": 983269,"Sequence number": 10552686, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5860 + } + }, + { + "ph": "f", "id": 234, "pid": 2338708, "tid": 2379421, "ts": 6345940465344.896, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940465389.349, "dur": 3.500, + "args": { + "External id": 983270,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940465399.493, "dur": 1994.780, + "args": { + "External id": 983271,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940465401.570, "dur": 1992.352, + "args": { + "External id": 983272,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940465406.219, "dur": 5.123, + "args": { + "External id": 983273,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940465412.674, "dur": 1979.802, + "args": { + "External id": 983274,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338708, "tid": 2379421, + "ts": 6345940467399.007, "dur": 0.525, + "args": { + "External id": 983275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345940467401.209, "dur": 3.380, + "args": { + "External id": 983276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345940467403.299, "dur": 1.130, + "args": { + "External id": 983277,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2379421, + "ts": 6345940467409.728, "dur": 29.369, + "args": { + "External id": 983278,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338708, "tid": 2379421, + "ts": 6345940467445.867, "dur": 47.233, + "args": { + "External id": 983279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2379421, + "ts": 6345940467447.553, "dur": 45.274, + "args": { + "External id": 983280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2379421, + "ts": 6345940467449.268, "dur": 43.128, + "args": { + "External id": 983281,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940467664.323, "dur": 25.381, + "args": { + "External id": 983282,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940467706.563, "dur": 16.894, + "args": { + "External id": 983283,"Record function id": 0, "Sequence number": 10552685, "Fwd thread id": 1, "Ev Idx": 5874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940467708.434, "dur": 12.346, + "args": { + "External id": 983284,"Sequence number": 10552685, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5875 + } + }, + { + "ph": "f", "id": 235, "pid": 2338708, "tid": 2379421, "ts": 6345940467708.434, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940467713.314, "dur": 7.211, + "args": { + "External id": 983285,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940467715.373, "dur": 4.966, + "args": { + "External id": 983286,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940467728.036, "dur": 91.723, + "args": { + "External id": 983287,"Record function id": 0, "Sequence number": 10552684, "Fwd thread id": 1, "Ev Idx": 5878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940467729.210, "dur": 84.096, + "args": { + "External id": 983288,"Sequence number": 10552684, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5879 + } + }, + { + "ph": "f", "id": 236, "pid": 2338708, "tid": 2379421, "ts": 6345940467729.210, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940467732.409, "dur": 80.462, + "args": { + "External id": 983289,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940467735.985, "dur": 30.766, + "args": { + "External id": 983290,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940467741.087, "dur": 4.281, + "args": { + "External id": 983291,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940467746.601, "dur": 19.806, + "args": { + "External id": 983292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940467749.030, "dur": 16.860, + "args": { + "External id": 983293,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940467768.616, "dur": 5.731, + "args": { + "External id": 983294,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940467772.290, "dur": 1.633, + "args": { + "External id": 983295,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940467775.393, "dur": 36.540, + "args": { + "External id": 983296,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940467824.730, "dur": 70.567, + "args": { + "External id": 983297,"Record function id": 0, "Sequence number": 10552683, "Fwd thread id": 1, "Ev Idx": 5888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940467826.241, "dur": 65.910, + "args": { + "External id": 983298,"Sequence number": 10552683, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5889 + } + }, + { + "ph": "f", "id": 237, "pid": 2338708, "tid": 2379421, "ts": 6345940467826.241, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940467829.530, "dur": 62.209, + "args": { + "External id": 983299,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "2"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940467832.072, "dur": 24.115, + "args": { + "External id": 983300,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940467833.764, "dur": 2.926, + "args": { + "External id": 983301,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940467837.539, "dur": 18.335, + "args": { + "External id": 983302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940467841.535, "dur": 13.826, + "args": { + "External id": 983303,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345940467864.090, "dur": 7.814, + "args": { + "External id": 983304,"Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940467869.941, "dur": 1.263, + "args": { + "External id": 983305,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940467873.110, "dur": 17.847, + "args": { + "External id": 983306,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940467899.730, "dur": 198.569, + "args": { + "External id": 983307,"Record function id": 0, "Sequence number": 10552682, "Fwd thread id": 1, "Ev Idx": 5898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940467901.050, "dur": 190.993, + "args": { + "External id": 983308,"Sequence number": 10552682, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5899 + } + }, + { + "ph": "f", "id": 238, "pid": 2338708, "tid": 2379421, "ts": 6345940467901.050, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940467903.690, "dur": 187.688, + "args": { + "External id": 983309,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940467905.453, "dur": 22.766, + "args": { + "External id": 983310,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940467907.104, "dur": 2.390, + "args": { + "External id": 983311,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940467910.190, "dur": 17.714, + "args": { + "External id": 983312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940467914.123, "dur": 13.357, + "args": { + "External id": 983313,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940467929.465, "dur": 7.280, + "args": { + "External id": 983314,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940467932.742, "dur": 3.693, + "args": { + "External id": 983315,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940467937.831, "dur": 151.464, + "args": { + "External id": 983316,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940468109.708, "dur": 150.970, + "args": { + "External id": 983317,"Record function id": 0, "Sequence number": 10552681, "Fwd thread id": 1, "Ev Idx": 5908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940468111.079, "dur": 124.286, + "args": { + "External id": 983318,"Sequence number": 10552681, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5909 + } + }, + { + "ph": "f", "id": 239, "pid": 2338708, "tid": 2379421, "ts": 6345940468111.079, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940468114.123, "dur": 120.891, + "args": { + "External id": 983319,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940468116.292, "dur": 29.854, + "args": { + "External id": 983320,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940468118.419, "dur": 3.440, + "args": { + "External id": 983321,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940468122.559, "dur": 23.244, + "args": { + "External id": 983322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940468123.614, "dur": 21.593, + "args": { + "External id": 983323,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940468149.977, "dur": 3.860, + "args": { + "External id": 983324,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940468152.705, "dur": 0.793, + "args": { + "External id": 983325,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940468154.791, "dur": 79.168, + "args": { + "External id": 983326,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940468241.001, "dur": 17.777, + "args": { + "External id": 983327,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940468266.294, "dur": 41.146, + "args": { + "External id": 983328,"Record function id": 0, "Sequence number": 10552680, "Fwd thread id": 1, "Ev Idx": 5919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940468267.766, "dur": 1.204, + "args": { + "External id": 983329,"Sequence number": 10552680, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5920 + } + }, + { + "ph": "f", "id": 240, "pid": 2338708, "tid": 2379421, "ts": 6345940468267.766, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940468271.965, "dur": 32.433, + "args": { + "External id": 983330,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940468274.839, "dur": 29.066, + "args": { + "External id": 983331,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940468281.965, "dur": 1.025, + "args": { + "External id": 983332,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940468313.993, "dur": 3339.092, + "args": { + "External id": 983333,"Record function id": 0, "Sequence number": 10552678, "Fwd thread id": 1, "Ev Idx": 5924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940468316.061, "dur": 3300.076, + "args": { + "External id": 983334,"Sequence number": 10552678, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5925 + } + }, + { + "ph": "f", "id": 241, "pid": 2338708, "tid": 2379421, "ts": 6345940468316.061, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940468356.599, "dur": 2.908, + "args": { + "External id": 983335,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940468362.642, "dur": 3019.119, + "args": { + "External id": 983336,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940468364.570, "dur": 3016.840, + "args": { + "External id": 983337,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940468368.094, "dur": 5.091, + "args": { + "External id": 983338,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940468376.769, "dur": 3003.399, + "args": { + "External id": 983339,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338708, "tid": 2379421, + "ts": 6345940471386.189, "dur": 0.592, + "args": { + "External id": 983340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345940471388.697, "dur": 3.240, + "args": { + "External id": 983341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345940471390.351, "dur": 1.435, + "args": { + "External id": 983342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2379421, + "ts": 6345940471397.311, "dur": 27.464, + "args": { + "External id": 983343,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338708, "tid": 2379421, + "ts": 6345940471431.315, "dur": 43.033, + "args": { + "External id": 983344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2379421, + "ts": 6345940471433.036, "dur": 41.089, + "args": { + "External id": 983345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2379421, + "ts": 6345940471434.597, "dur": 39.206, + "args": { + "External id": 983346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940471630.045, "dur": 17.901, + "args": { + "External id": 983347,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 5938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940471665.435, "dur": 17.967, + "args": { + "External id": 983348,"Record function id": 0, "Sequence number": 10552677, "Fwd thread id": 1, "Ev Idx": 5939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940471669.942, "dur": 11.118, + "args": { + "External id": 983349,"Sequence number": 10552677, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 5940 + } + }, + { + "ph": "f", "id": 242, "pid": 2338708, "tid": 2379421, "ts": 6345940471669.942, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940471674.004, "dur": 6.797, + "args": { + "External id": 983350,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940471675.962, "dur": 4.680, + "args": { + "External id": 983351,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 5942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940471687.675, "dur": 81.098, + "args": { + "External id": 983352,"Record function id": 0, "Sequence number": 10552676, "Fwd thread id": 1, "Ev Idx": 5943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940471688.867, "dur": 74.691, + "args": { + "External id": 983353,"Sequence number": 10552676, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5944 + } + }, + { + "ph": "f", "id": 243, "pid": 2338708, "tid": 2379421, "ts": 6345940471688.867, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940471691.775, "dur": 71.338, + "args": { + "External id": 983354,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 5945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940471695.664, "dur": 27.354, + "args": { + "External id": 983355,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940471697.991, "dur": 3.907, + "args": { + "External id": 983356,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940471703.195, "dur": 19.461, + "args": { + "External id": 983357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940471704.723, "dur": 17.379, + "args": { + "External id": 983358,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 5949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940471725.117, "dur": 7.324, + "args": { + "External id": 983359,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 5950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940471728.455, "dur": 3.586, + "args": { + "External id": 983360,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940471735.783, "dur": 26.489, + "args": { + "External id": 983361,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940471773.954, "dur": 62.310, + "args": { + "External id": 983362,"Record function id": 0, "Sequence number": 10552675, "Fwd thread id": 1, "Ev Idx": 5953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940471775.189, "dur": 57.952, + "args": { + "External id": 983363,"Sequence number": 10552675, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 5954 + } + }, + { + "ph": "f", "id": 244, "pid": 2338708, "tid": 2379421, "ts": 6345940471775.189, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940471778.629, "dur": 54.160, + "args": { + "External id": 983364,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 5955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940471781.593, "dur": 19.065, + "args": { + "External id": 983365,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940471782.916, "dur": 2.878, + "args": { + "External id": 983366,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940471786.430, "dur": 13.936, + "args": { + "External id": 983367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940471787.457, "dur": 12.492, + "args": { + "External id": 983368,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345940471802.225, "dur": 10.129, + "args": { + "External id": 983369,"Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 5960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940471810.479, "dur": 1.178, + "args": { + "External id": 983370,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940471813.244, "dur": 18.896, + "args": { + "External id": 983371,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 5962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940471840.924, "dur": 130.521, + "args": { + "External id": 983372,"Record function id": 0, "Sequence number": 10552674, "Fwd thread id": 1, "Ev Idx": 5963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940471842.350, "dur": 125.037, + "args": { + "External id": 983373,"Sequence number": 10552674, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5964 + } + }, + { + "ph": "f", "id": 245, "pid": 2338708, "tid": 2379421, "ts": 6345940471842.350, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940471844.749, "dur": 122.061, + "args": { + "External id": 983374,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940471846.259, "dur": 19.305, + "args": { + "External id": 983375,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940471847.655, "dur": 2.183, + "args": { + "External id": 983376,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940471850.607, "dur": 14.633, + "args": { + "External id": 983377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940471851.449, "dur": 13.374, + "args": { + "External id": 983378,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940471866.577, "dur": 3.989, + "args": { + "External id": 983379,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940471869.422, "dur": 0.904, + "args": { + "External id": 983380,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940471874.034, "dur": 91.654, + "args": { + "External id": 983381,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940471976.718, "dur": 176.196, + "args": { + "External id": 983382,"Record function id": 0, "Sequence number": 10552673, "Fwd thread id": 1, "Ev Idx": 5973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940471977.803, "dur": 148.207, + "args": { + "External id": 983383,"Sequence number": 10552673, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5974 + } + }, + { + "ph": "f", "id": 246, "pid": 2338708, "tid": 2379421, "ts": 6345940471977.803, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940471980.062, "dur": 145.508, + "args": { + "External id": 983384,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 5975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940471981.688, "dur": 22.228, + "args": { + "External id": 983385,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 5976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940471983.070, "dur": 2.500, + "args": { + "External id": 983386,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940471986.346, "dur": 17.226, + "args": { + "External id": 983387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 5978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940471987.477, "dur": 15.653, + "args": { + "External id": 983388,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 5979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940472005.444, "dur": 21.742, + "args": { + "External id": 983389,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 5980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940472025.361, "dur": 1.135, + "args": { + "External id": 983390,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 5981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940472028.082, "dur": 95.634, + "args": { + "External id": 983391,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940472133.614, "dur": 17.710, + "args": { + "External id": 983392,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 5983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940472163.447, "dur": 47.483, + "args": { + "External id": 983393,"Record function id": 0, "Sequence number": 10552672, "Fwd thread id": 1, "Ev Idx": 5984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940472164.929, "dur": 1.394, + "args": { + "External id": 983394,"Sequence number": 10552672, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 5985 + } + }, + { + "ph": "f", "id": 247, "pid": 2338708, "tid": 2379421, "ts": 6345940472164.929, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940472168.761, "dur": 36.340, + "args": { + "External id": 983395,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 5986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940472171.877, "dur": 32.747, + "args": { + "External id": 983396,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[1], [], [], []], "Ev Idx": 5987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940472178.800, "dur": 3.218, + "args": { + "External id": 983397,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 5988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940472216.682, "dur": 3403.824, + "args": { + "External id": 983398,"Record function id": 0, "Sequence number": 10552671, "Fwd thread id": 1, "Ev Idx": 5989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940472230.626, "dur": 3351.256, + "args": { + "External id": 983399,"Sequence number": 10552671, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5990 + } + }, + { + "ph": "f", "id": 248, "pid": 2338708, "tid": 2379421, "ts": 6345940472230.626, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940472265.843, "dur": 3.298, + "args": { + "External id": 983400,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940472272.148, "dur": 3081.134, + "args": { + "External id": 983401,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940472273.936, "dur": 3079.017, + "args": { + "External id": 983402,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 5993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940472277.591, "dur": 4.720, + "args": { + "External id": 983403,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940472283.636, "dur": 3068.032, + "args": { + "External id": 983404,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 5995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2338708, "tid": 2379421, + "ts": 6345940475357.969, "dur": 0.453, + "args": { + "External id": 983405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345940475360.008, "dur": 6.017, + "args": { + "External id": 983406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2338708, "tid": 2379421, + "ts": 6345940475364.686, "dur": 1.065, + "args": { + "External id": 983407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 5998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2379421, + "ts": 6345940475370.794, "dur": 24.715, + "args": { + "External id": 983408,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 5999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338708, "tid": 2379421, + "ts": 6345940475401.625, "dur": 41.665, + "args": { + "External id": 983409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2379421, + "ts": 6345940475403.003, "dur": 40.077, + "args": { + "External id": 983410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2379421, + "ts": 6345940475404.837, "dur": 37.884, + "args": { + "External id": 983411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940475596.067, "dur": 19.246, + "args": { + "External id": 983412,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 6003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940475636.795, "dur": 16.325, + "args": { + "External id": 983413,"Record function id": 0, "Ev Idx": 6004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940475639.939, "dur": 11.038, + "args": { + "External id": 983414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940475643.768, "dur": 6.321, + "args": { + "External id": 983415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940475645.687, "dur": 4.281, + "args": { + "External id": 983416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 6007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940475657.302, "dur": 16.686, + "args": { + "External id": 983417,"Record function id": 0, "Sequence number": 10552670, "Fwd thread id": 1, "Ev Idx": 6008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940475658.493, "dur": 12.668, + "args": { + "External id": 983418,"Sequence number": 10552670, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6009 + } + }, + { + "ph": "f", "id": 249, "pid": 2338708, "tid": 2379421, "ts": 6345940475658.493, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940475662.173, "dur": 8.669, + "args": { + "External id": 983419,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940475666.984, "dur": 3.637, + "args": { + "External id": 983420,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940475677.998, "dur": 81.784, + "args": { + "External id": 983421,"Record function id": 0, "Sequence number": 10552669, "Fwd thread id": 1, "Ev Idx": 6012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940475679.245, "dur": 75.693, + "args": { + "External id": 983422,"Sequence number": 10552669, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6013 + } + }, + { + "ph": "f", "id": 250, "pid": 2338708, "tid": 2379421, "ts": 6345940475679.245, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940475682.062, "dur": 72.221, + "args": { + "External id": 983423,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940475685.811, "dur": 28.562, + "args": { + "External id": 983424,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940475688.388, "dur": 4.331, + "args": { + "External id": 983425,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940475693.782, "dur": 20.261, + "args": { + "External id": 983426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940475696.052, "dur": 17.496, + "args": { + "External id": 983427,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940475716.328, "dur": 5.449, + "args": { + "External id": 983428,"Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 6019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940475719.931, "dur": 1.531, + "args": { + "External id": 983429,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[16777216, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 6020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940475722.989, "dur": 30.178, + "args": { + "External id": 983430,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940475764.604, "dur": 68.364, + "args": { + "External id": 983431,"Record function id": 0, "Sequence number": 10552668, "Fwd thread id": 1, "Ev Idx": 6022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SelectBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940475768.935, "dur": 60.255, + "args": { + "External id": 983432,"Sequence number": 10552668, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6023 + } + }, + { + "ph": "f", "id": 251, "pid": 2338708, "tid": 2379421, "ts": 6345940475768.935, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940475771.508, "dur": 57.325, + "args": { + "External id": 983433,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "2", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 6024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940475774.122, "dur": 26.885, + "args": { + "External id": 983434,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940475775.584, "dur": 5.678, + "args": { + "External id": 983435,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940475781.939, "dur": 18.753, + "args": { + "External id": 983436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940475783.155, "dur": 17.053, + "args": { + "External id": 983437,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345940475802.504, "dur": 6.764, + "args": { + "External id": 983438,"Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940475807.442, "dur": 1.133, + "args": { + "External id": 983439,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940475810.062, "dur": 18.063, + "args": { + "External id": 983440,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940475837.517, "dur": 142.605, + "args": { + "External id": 983441,"Record function id": 0, "Sequence number": 10552667, "Fwd thread id": 1, "Ev Idx": 6032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940475841.297, "dur": 134.949, + "args": { + "External id": 983442,"Sequence number": 10552667, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6033 + } + }, + { + "ph": "f", "id": 252, "pid": 2338708, "tid": 2379421, "ts": 6345940475841.297, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940475843.669, "dur": 132.116, + "args": { + "External id": 983443,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 6034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940475845.522, "dur": 26.818, + "args": { + "External id": 983444,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940475847.113, "dur": 2.289, + "args": { + "External id": 983445,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940475855.691, "dur": 16.334, + "args": { + "External id": 983446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940475858.990, "dur": 12.566, + "args": { + "External id": 983447,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940475873.437, "dur": 3.950, + "args": { + "External id": 983448,"Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 6039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940475876.339, "dur": 0.779, + "args": { + "External id": 983449,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940475878.277, "dur": 96.295, + "args": { + "External id": 983450,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940475987.172, "dur": 207.536, + "args": { + "External id": 983451,"Record function id": 0, "Sequence number": 10552666, "Fwd thread id": 1, "Ev Idx": 6042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SliceBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940475988.516, "dur": 175.687, + "args": { + "External id": 983452,"Sequence number": 10552666, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6043 + } + }, + { + "ph": "f", "id": 253, "pid": 2338708, "tid": 2379421, "ts": 6345940475988.516, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940475990.733, "dur": 172.951, + "args": { + "External id": 983453,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], [], []], "Ev Idx": 6044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2379421, + "ts": 6345940475995.115, "dur": 39.234, + "args": { + "External id": 983454,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 6045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940475996.657, "dur": 2.886, + "args": { + "External id": 983455,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4, 4096]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2379421, + "ts": 6345940476000.388, "dur": 33.626, + "args": { + "External id": 983456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2379421, + "ts": 6345940476001.708, "dur": 31.169, + "args": { + "External id": 983457,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], []], "Ev Idx": 6048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940476036.434, "dur": 4.665, + "args": { + "External id": 983458,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 6049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940476039.591, "dur": 1.106, + "args": { + "External id": 983459,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940476042.087, "dur": 119.776, + "args": { + "External id": 983460,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940476172.548, "dur": 19.327, + "args": { + "External id": 983461,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [67108864, 16384, 4096, 1], []], "Input Dims": [[8, 4096, 4, 4096], [8, 4096, 4, 4096], []], "Ev Idx": 6052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940476204.613, "dur": 383.890, + "args": { + "External id": 983462,"Record function id": 0, "Sequence number": 10552665, "Fwd thread id": 1, "Ev Idx": 6053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940476206.543, "dur": 373.183, + "args": { + "External id": 983463,"Sequence number": 10552665, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6054 + } + }, + { + "ph": "f", "id": 254, "pid": 2338708, "tid": 2379421, "ts": 6345940476206.543, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345940476391.793, "dur": 48.240, + "args": { + "External id": 983464,"kernel_hash": "csesqrbnxb6gkjrwgoohyamgdaghjz2d2andcfwzecbkqzeczzqz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "131072", "4096", "1", "993", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/se/csesqrbnxb6gkjrwgoohyamgdaghjz2d2andcfwzecbkqzeczzqz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [4096], [131072, 4096], [131072, 4096], [132, 4096], [131072], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 2338708, "tid": 2379421, + "ts": 6345940476478.159, "dur": 29.169, + "args": { + "External id": 983465,"kernel_hash": "cgpnzfm4ww5f67uofcrd54t5w35w6y4yspbhmhqt5ddc6salf5zl", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/gp/cgpnzfm4ww5f67uofcrd54t5w35w6y4yspbhmhqt5ddc6salf5zl.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 6056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 2338708, "tid": 2379421, + "ts": 6345940476530.797, "dur": 24.577, + "args": { + "External id": 983466,"kernel_hash": "cvj4y67mu47myxc3c6bg7waq6ihcppieaul2mb3dd66obpbk7cmj", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvj4y67mu47myxc3c6bg7waq6ihcppieaul2mb3dd66obpbk7cmj.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 6057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940476599.301, "dur": 13.046, + "args": { + "External id": 983467,"Record function id": 0, "Ev Idx": 6058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940476601.680, "dur": 9.835, + "args": { + "External id": 983468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940476605.525, "dur": 5.054, + "args": { + "External id": 983469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940476606.903, "dur": 3.537, + "args": { + "External id": 983470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: StackBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940476617.693, "dur": 42.353, + "args": { + "External id": 983471,"Record function id": 0, "Sequence number": 10552664, "Fwd thread id": 1, "Ev Idx": 6062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "StackBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940476618.557, "dur": 33.185, + "args": { + "External id": 983472,"Sequence number": 10552664, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 4096, 1]], "Input Dims": [[8, 4096, 4, 4096]], "Ev Idx": 6063 + } + }, + { + "ph": "f", "id": 255, "pid": 2338708, "tid": 2379421, "ts": 6345940476618.557, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345940476621.740, "dur": 10.973, + "args": { + "External id": 983473,"Record function id": 0, "Concrete Inputs": ["", "-2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940476628.637, "dur": 1.640, + "args": { + "External id": 983474,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345940476633.830, "dur": 4.915, + "args": { + "External id": 983475,"Record function id": 0, "Concrete Inputs": ["", "-2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940476636.631, "dur": 0.798, + "args": { + "External id": 983476,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345940476639.783, "dur": 3.765, + "args": { + "External id": 983477,"Record function id": 0, "Concrete Inputs": ["", "-2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940476642.115, "dur": 0.647, + "args": { + "External id": 983478,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2379421, + "ts": 6345940476644.439, "dur": 6.614, + "args": { + "External id": 983479,"Record function id": 0, "Concrete Inputs": ["", "-2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 6070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940476647.115, "dur": 3.064, + "args": { + "External id": 983480,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 6071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940476664.636, "dur": 6.405, + "args": { + "External id": 983481,"Record function id": 0, "Sequence number": 10552663, "Fwd thread id": 1, "Ev Idx": 6072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940476665.589, "dur": 1.515, + "args": { + "External id": 983482,"Sequence number": 10552663, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6073 + } + }, + { + "ph": "f", "id": 256, "pid": 2338708, "tid": 2379421, "ts": 6345940476665.589, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940476676.023, "dur": 667.978, + "args": { + "External id": 983483,"Record function id": 0, "Sequence number": 10552662, "Fwd thread id": 1, "Ev Idx": 6074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940476677.797, "dur": 649.941, + "args": { + "External id": 983484,"Sequence number": 10552662, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6075 + } + }, + { + "ph": "f", "id": 257, "pid": 2338708, "tid": 2379421, "ts": 6345940476677.797, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940476722.391, "dur": 13.813, + "args": { + "External id": 983485,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940476730.027, "dur": 5.797, + "args": { + "External id": 983486,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940476743.504, "dur": 8.440, + "args": { + "External id": 983487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940476746.165, "dur": 4.610, + "args": { + "External id": 983488,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940476749.369, "dur": 1.115, + "args": { + "External id": 983489,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2379421, + "ts": 6345940476756.344, "dur": 142.225, + "args": { + "External id": 983490,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940476757.793, "dur": 7.432, + "args": { + "External id": 983491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940476758.728, "dur": 5.348, + "args": { + "External id": 983492,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940476763.552, "dur": 0.417, + "args": { + "External id": 983493,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2379421, + "ts": 6345940476769.771, "dur": 127.748, + "args": { + "External id": 983494,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940476771.930, "dur": 124.453, + "args": { + "External id": 983495,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345940476904.449, "dur": 6.216, + "args": { + "External id": 983496,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940476907.300, "dur": 3.207, + "args": { + "External id": 983497,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940476948.412, "dur": 7.097, + "args": { + "External id": 983498,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940476957.042, "dur": 3.779, + "args": { + "External id": 983499,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940476964.826, "dur": 4.543, + "args": { + "External id": 983500,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940477034.694, "dur": 4.513, + "args": { + "External id": 983501,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940477035.971, "dur": 2.901, + "args": { + "External id": 983502,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338708, "tid": 2379421, + "ts": 6345940477115.726, "dur": 185.708, + "args": { + "External id": 983503,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345940477123.733, "dur": 10.465, + "args": { + "External id": 983504,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477130.378, "dur": 1.714, + "args": { + "External id": 983505,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940477136.577, "dur": 9.415, + "args": { + "External id": 983506,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477144.081, "dur": 0.778, + "args": { + "External id": 983507,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345940477148.923, "dur": 3.163, + "args": { + "External id": 983508,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477151.141, "dur": 0.447, + "args": { + "External id": 983509,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940477155.677, "dur": 4.393, + "args": { + "External id": 983510,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477158.113, "dur": 0.819, + "args": { + "External id": 983511,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940477166.078, "dur": 3.033, + "args": { + "External id": 983512,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477167.958, "dur": 0.741, + "args": { + "External id": 983513,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940477170.343, "dur": 8.352, + "args": { + "External id": 983514,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940477175.123, "dur": 3.340, + "args": { + "External id": 983515,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940477179.718, "dur": 4.322, + "args": { + "External id": 983516,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477181.506, "dur": 2.146, + "args": { + "External id": 983517,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940477184.938, "dur": 3.021, + "args": { + "External id": 983518,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940477185.971, "dur": 1.824, + "args": { + "External id": 983519,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345940477192.979, "dur": 92.634, + "args": { + "External id": 983520,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940477288.024, "dur": 1.721, + "args": { + "External id": 983521,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940477290.770, "dur": 5.025, + "args": { + "External id": 983522,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477294.036, "dur": 0.735, + "args": { + "External id": 983523,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940477298.786, "dur": 1.204, + "args": { + "External id": 983524,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940477359.553, "dur": 12.461, + "args": { + "External id": 983525,"Record function id": 0, "Ev Idx": 6116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940477362.215, "dur": 8.978, + "args": { + "External id": 983526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940477365.298, "dur": 4.725, + "args": { + "External id": 983527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940477366.910, "dur": 2.914, + "args": { + "External id": 983528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477377.008, "dur": 12.535, + "args": { + "External id": 983529,"Record function id": 0, "Sequence number": 10552661, "Fwd thread id": 1, "Ev Idx": 6120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477378.453, "dur": 7.953, + "args": { + "External id": 983530,"Sequence number": 10552661, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6121 + } + }, + { + "ph": "f", "id": 258, "pid": 2338708, "tid": 2379421, "ts": 6345940477378.453, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940477381.489, "dur": 4.683, + "args": { + "External id": 983531,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940477384.766, "dur": 1.244, + "args": { + "External id": 983532,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477394.373, "dur": 162.257, + "args": { + "External id": 983533,"Record function id": 0, "Sequence number": 10552660, "Fwd thread id": 1, "Ev Idx": 6124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477395.501, "dur": 152.263, + "args": { + "External id": 983534,"Sequence number": 10552660, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6125 + } + }, + { + "ph": "f", "id": 259, "pid": 2338708, "tid": 2379421, "ts": 6345940477395.501, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940477400.904, "dur": 5.782, + "args": { + "External id": 983535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940477402.922, "dur": 3.103, + "args": { + "External id": 983536,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477404.952, "dur": 0.807, + "args": { + "External id": 983537,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940477410.715, "dur": 61.640, + "args": { + "External id": 983538,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940477474.276, "dur": 5.011, + "args": { + "External id": 983539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940477475.099, "dur": 3.409, + "args": { + "External id": 983540,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477477.274, "dur": 1.034, + "args": { + "External id": 983541,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940477481.537, "dur": 6.954, + "args": { + "External id": 983542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940477482.726, "dur": 4.931, + "args": { + "External id": 983543,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477486.837, "dur": 0.726, + "args": { + "External id": 983544,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940477489.302, "dur": 57.451, + "args": { + "External id": 983545,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477562.449, "dur": 8.223, + "args": { + "External id": 983546,"Record function id": 0, "Sequence number": 10552659, "Fwd thread id": 1, "Ev Idx": 6137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477563.573, "dur": 4.616, + "args": { + "External id": 983547,"Sequence number": 10552659, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6138 + } + }, + { + "ph": "f", "id": 260, "pid": 2338708, "tid": 2379421, "ts": 6345940477563.573, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940477565.443, "dur": 2.553, + "args": { + "External id": 983548,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940477566.671, "dur": 1.174, + "args": { + "External id": 983549,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477575.407, "dur": 45.236, + "args": { + "External id": 983550,"Record function id": 0, "Sequence number": 10552658, "Fwd thread id": 1, "Ev Idx": 6141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477576.598, "dur": 41.314, + "args": { + "External id": 983551,"Sequence number": 10552658, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6142 + } + }, + { + "ph": "f", "id": 261, "pid": 2338708, "tid": 2379421, "ts": 6345940477576.598, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940477577.937, "dur": 39.746, + "args": { + "External id": 983552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940477612.347, "dur": 4.588, + "args": { + "External id": 988161,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477616.398, "dur": 0.437, + "args": { + "External id": 988162,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940477625.751, "dur": 6.470, + "args": { + "External id": 988163,"Record function id": 0, "Ev Idx": 6146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940477627.590, "dur": 4.004, + "args": { + "External id": 988164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940477629.245, "dur": 2.062, + "args": { + "External id": 988165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940477629.780, "dur": 1.407, + "args": { + "External id": 988166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477636.351, "dur": 7.701, + "args": { + "External id": 988167,"Record function id": 0, "Sequence number": 10552657, "Fwd thread id": 1, "Ev Idx": 6150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477638.004, "dur": 3.356, + "args": { + "External id": 988168,"Sequence number": 10552657, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6151 + } + }, + { + "ph": "f", "id": 262, "pid": 2338708, "tid": 2379421, "ts": 6345940477638.004, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940477639.085, "dur": 2.118, + "args": { + "External id": 988169,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940477640.054, "dur": 0.984, + "args": { + "External id": 988170,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477648.159, "dur": 123.408, + "args": { + "External id": 988171,"Record function id": 0, "Sequence number": 10552656, "Fwd thread id": 1, "Ev Idx": 6154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477648.970, "dur": 112.791, + "args": { + "External id": 988172,"Sequence number": 10552656, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6155 + } + }, + { + "ph": "f", "id": 263, "pid": 2338708, "tid": 2379421, "ts": 6345940477648.970, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940477652.945, "dur": 2.782, + "args": { + "External id": 988173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940477653.508, "dur": 1.684, + "args": { + "External id": 988174,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477654.579, "dur": 0.432, + "args": { + "External id": 988175,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940477656.354, "dur": 50.653, + "args": { + "External id": 988176,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940477708.423, "dur": 6.016, + "args": { + "External id": 988177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940477708.973, "dur": 4.757, + "args": { + "External id": 988178,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477712.938, "dur": 0.656, + "args": { + "External id": 988179,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940477715.569, "dur": 5.163, + "args": { + "External id": 988180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940477716.508, "dur": 3.726, + "args": { + "External id": 988181,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477717.807, "dur": 2.311, + "args": { + "External id": 988182,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940477721.314, "dur": 39.322, + "args": { + "External id": 988183,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477777.692, "dur": 36.802, + "args": { + "External id": 988184,"Record function id": 0, "Sequence number": 10552655, "Fwd thread id": 1, "Ev Idx": 6167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477778.708, "dur": 4.541, + "args": { + "External id": 988185,"Sequence number": 10552655, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6168 + } + }, + { + "ph": "f", "id": 264, "pid": 2338708, "tid": 2379421, "ts": 6345940477778.708, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940477780.527, "dur": 2.551, + "args": { + "External id": 988186,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940477781.439, "dur": 1.469, + "args": { + "External id": 988187,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345940477786.594, "dur": 24.618, + "args": { + "External id": 988188,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477821.908, "dur": 8.130, + "args": { + "External id": 988189,"Record function id": 0, "Sequence number": 10552654, "Fwd thread id": 1, "Ev Idx": 6172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940477823.154, "dur": 4.584, + "args": { + "External id": 988190,"Sequence number": 10552654, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6173 + } + }, + { + "ph": "f", "id": 265, "pid": 2338708, "tid": 2379421, "ts": 6345940477823.154, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940477823.836, "dur": 3.658, + "args": { + "External id": 988191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940477824.574, "dur": 2.294, + "args": { + "External id": 988192,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940477826.148, "dur": 0.576, + "args": { + "External id": 988193,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940477835.261, "dur": 5.816, + "args": { + "External id": 988194,"Record function id": 0, "Ev Idx": 6177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940477836.807, "dur": 3.682, + "args": { + "External id": 988195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940477838.258, "dur": 1.841, + "args": { + "External id": 988196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940477838.805, "dur": 1.123, + "args": { + "External id": 988197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940477846.521, "dur": 578.773, + "args": { + "External id": 988198,"Record function id": 0, "Sequence number": 10552653, "Fwd thread id": 1, "Ev Idx": 6181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940477848.221, "dur": 556.342, + "args": { + "External id": 988199,"Sequence number": 10552653, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6182 + } + }, + { + "ph": "f", "id": 266, "pid": 2338708, "tid": 2379421, "ts": 6345940477848.221, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2379421, + "ts": 6345940477877.333, "dur": 41.077, + "args": { + "External id": 988200,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345940477879.179, "dur": 38.998, + "args": { + "External id": 988201,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940477882.756, "dur": 9.677, + "args": { + "External id": 988202,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940477888.016, "dur": 3.697, + "args": { + "External id": 988203,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940477894.169, "dur": 23.406, + "args": { + "External id": 988204,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940477933.815, "dur": 2.916, + "args": { + "External id": 988205,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940477934.902, "dur": 1.654, + "args": { + "External id": 988206,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940477941.926, "dur": 3.876, + "args": { + "External id": 988207,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940477944.845, "dur": 0.843, + "args": { + "External id": 988208,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940477960.435, "dur": 4.515, + "args": { + "External id": 988209,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940477978.026, "dur": 3.332, + "args": { + "External id": 988210,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940478262.714, "dur": 3.971, + "args": { + "External id": 988211,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940478272.254, "dur": 46.844, + "args": { + "External id": 988212,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940478289.685, "dur": 1.270, + "args": { + "External id": 988213,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940478325.912, "dur": 38.118, + "args": { + "External id": 988214,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940478327.966, "dur": 35.821, + "args": { + "External id": 988215,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940478336.973, "dur": 5.505, + "args": { + "External id": 988216,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940478344.209, "dur": 19.027, + "args": { + "External id": 988217,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345940478369.338, "dur": 3.451, + "args": { + "External id": 988218,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940478370.934, "dur": 1.670, + "args": { + "External id": 988219,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940478380.914, "dur": 3.001, + "args": { + "External id": 988220,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940478382.203, "dur": 1.595, + "args": { + "External id": 988221,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940478386.856, "dur": 4.498, + "args": { + "External id": 988222,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940478390.224, "dur": 1.018, + "args": { + "External id": 988223,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940478447.090, "dur": 15.897, + "args": { + "External id": 988224,"Record function id": 0, "Ev Idx": 6207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940478450.953, "dur": 10.730, + "args": { + "External id": 988225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940478454.774, "dur": 4.969, + "args": { + "External id": 988226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940478456.478, "dur": 3.078, + "args": { + "External id": 988227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940478468.589, "dur": 12.920, + "args": { + "External id": 988228,"Record function id": 0, "Sequence number": 10552652, "Fwd thread id": 1, "Ev Idx": 6211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940478469.925, "dur": 8.020, + "args": { + "External id": 988229,"Sequence number": 10552652, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6212 + } + }, + { + "ph": "f", "id": 267, "pid": 2338708, "tid": 2379421, "ts": 6345940478469.925, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940478471.954, "dur": 5.702, + "args": { + "External id": 988230,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940478475.769, "dur": 1.747, + "args": { + "External id": 988231,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940478486.242, "dur": 183.492, + "args": { + "External id": 988232,"Record function id": 0, "Sequence number": 10552651, "Fwd thread id": 1, "Ev Idx": 6215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940478487.206, "dur": 176.071, + "args": { + "External id": 988233,"Sequence number": 10552651, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6216 + } + }, + { + "ph": "f", "id": 268, "pid": 2338708, "tid": 2379421, "ts": 6345940478487.206, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940478491.990, "dur": 6.793, + "args": { + "External id": 988234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940478494.310, "dur": 3.773, + "args": { + "External id": 988235,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940478496.765, "dur": 1.002, + "args": { + "External id": 988236,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940478500.336, "dur": 96.158, + "args": { + "External id": 988237,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940478598.233, "dur": 6.621, + "args": { + "External id": 988238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940478601.230, "dur": 2.740, + "args": { + "External id": 988239,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940478602.993, "dur": 0.833, + "args": { + "External id": 988240,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940478606.598, "dur": 6.378, + "args": { + "External id": 988241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940478607.936, "dur": 4.461, + "args": { + "External id": 988242,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940478609.128, "dur": 3.149, + "args": { + "External id": 988243,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940478613.683, "dur": 48.598, + "args": { + "External id": 988244,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940478675.994, "dur": 10.449, + "args": { + "External id": 988245,"Record function id": 0, "Sequence number": 10552650, "Fwd thread id": 1, "Ev Idx": 6228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940478677.014, "dur": 7.070, + "args": { + "External id": 988246,"Sequence number": 10552650, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6229 + } + }, + { + "ph": "f", "id": 269, "pid": 2338708, "tid": 2379421, "ts": 6345940478677.014, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940478681.577, "dur": 2.342, + "args": { + "External id": 988247,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940478682.501, "dur": 1.243, + "args": { + "External id": 988248,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940478690.582, "dur": 9.168, + "args": { + "External id": 988249,"Record function id": 0, "Sequence number": 10552649, "Fwd thread id": 1, "Ev Idx": 6232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940478691.558, "dur": 5.460, + "args": { + "External id": 988250,"Sequence number": 10552649, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6233 + } + }, + { + "ph": "f", "id": 270, "pid": 2338708, "tid": 2379421, "ts": 6345940478691.558, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940478692.508, "dur": 4.274, + "args": { + "External id": 988251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940478693.351, "dur": 2.807, + "args": { + "External id": 988252,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940478695.416, "dur": 0.603, + "args": { + "External id": 988253,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940478704.580, "dur": 9.905, + "args": { + "External id": 988254,"Record function id": 0, "Ev Idx": 6237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940478706.804, "dur": 7.115, + "args": { + "External id": 988255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940478708.250, "dur": 5.274, + "args": { + "External id": 988256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940478711.689, "dur": 1.659, + "args": { + "External id": 988257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940478718.557, "dur": 7.567, + "args": { + "External id": 988258,"Record function id": 0, "Sequence number": 10552648, "Fwd thread id": 1, "Ev Idx": 6241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940478719.602, "dur": 4.040, + "args": { + "External id": 988259,"Sequence number": 10552648, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6242 + } + }, + { + "ph": "f", "id": 271, "pid": 2338708, "tid": 2379421, "ts": 6345940478719.602, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940478720.781, "dur": 2.693, + "args": { + "External id": 988260,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940478721.957, "dur": 1.399, + "args": { + "External id": 988261,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940478731.544, "dur": 538.252, + "args": { + "External id": 988262,"Record function id": 0, "Sequence number": 10552647, "Fwd thread id": 1, "Ev Idx": 6245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940478733.314, "dur": 512.434, + "args": { + "External id": 988263,"Sequence number": 10552647, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6246 + } + }, + { + "ph": "f", "id": 272, "pid": 2338708, "tid": 2379421, "ts": 6345940478733.314, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940478752.770, "dur": 9.024, + "args": { + "External id": 988264,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940478755.972, "dur": 5.302, + "args": { + "External id": 988265,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940478764.391, "dur": 4.384, + "args": { + "External id": 988266,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940478766.423, "dur": 2.006, + "args": { + "External id": 988267,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940478770.437, "dur": 7.233, + "args": { + "External id": 988268,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940478772.310, "dur": 5.123, + "args": { + "External id": 988269,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940478821.414, "dur": 391.759, + "args": { + "External id": 988270,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940478925.838, "dur": 6.803, + "args": { + "External id": 988271,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940478935.180, "dur": 3.131, + "args": { + "External id": 988272,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940478939.934, "dur": 2.210, + "args": { + "External id": 988273,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940478943.704, "dur": 2.743, + "args": { + "External id": 988274,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940479006.742, "dur": 30.152, + "args": { + "External id": 988275,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940479033.569, "dur": 2.863, + "args": { + "External id": 988276,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940479039.365, "dur": 84.887, + "args": { + "External id": 988277,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940479047.650, "dur": 1.858, + "args": { + "External id": 988278,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940479130.484, "dur": 2.398, + "args": { + "External id": 988279,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940479131.615, "dur": 1.174, + "args": { + "External id": 988280,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940479134.070, "dur": 19.787, + "args": { + "External id": 988281,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940479139.620, "dur": 0.590, + "args": { + "External id": 988282,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940479229.795, "dur": 4.837, + "args": { + "External id": 988283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940479238.372, "dur": 0.977, + "args": { + "External id": 988284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940479241.751, "dur": 0.688, + "args": { + "External id": 988285,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940479286.844, "dur": 285.344, + "args": { + "External id": 988286,"Record function id": 0, "Sequence number": 10552646, "Fwd thread id": 1, "Ev Idx": 6269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940479289.440, "dur": 274.642, + "args": { + "External id": 988287,"Sequence number": 10552646, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6270 + } + }, + { + "ph": "f", "id": 273, "pid": 2338708, "tid": 2379421, "ts": 6345940479289.440, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345940479317.352, "dur": 61.151, + "args": { + "External id": 988288,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940479321.774, "dur": 5.940, + "args": { + "External id": 988289,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940479329.684, "dur": 48.252, + "args": { + "External id": 988290,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940479391.654, "dur": 6.607, + "args": { + "External id": 988291,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940479394.813, "dur": 3.101, + "args": { + "External id": 988292,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940479581.370, "dur": 206.918, + "args": { + "External id": 988293,"Record function id": 0, "Sequence number": 10552645, "Fwd thread id": 1, "Ev Idx": 6276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940479583.597, "dur": 197.251, + "args": { + "External id": 988294,"Sequence number": 10552645, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6277 + } + }, + { + "ph": "f", "id": 274, "pid": 2338708, "tid": 2379421, "ts": 6345940479583.597, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345940479600.914, "dur": 55.383, + "args": { + "External id": 988295,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940479604.640, "dur": 3.594, + "args": { + "External id": 988296,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940479609.668, "dur": 45.946, + "args": { + "External id": 988297,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940479665.454, "dur": 6.099, + "args": { + "External id": 988298,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940479668.102, "dur": 3.116, + "args": { + "External id": 988299,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940479796.506, "dur": 15.070, + "args": { + "External id": 988300,"Record function id": 0, "Sequence number": 10552644, "Fwd thread id": 1, "Ev Idx": 6283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940479798.194, "dur": 10.635, + "args": { + "External id": 988301,"Sequence number": 10552644, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6284 + } + }, + { + "ph": "f", "id": 275, "pid": 2338708, "tid": 2379421, "ts": 6345940479798.194, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940479801.695, "dur": 6.758, + "args": { + "External id": 988302,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940479803.404, "dur": 4.782, + "args": { + "External id": 988303,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940479815.883, "dur": 9.220, + "args": { + "External id": 988304,"Record function id": 0, "Sequence number": 10552643, "Fwd thread id": 1, "Ev Idx": 6287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940479816.994, "dur": 6.210, + "args": { + "External id": 988305,"Sequence number": 10552643, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6288 + } + }, + { + "ph": "f", "id": 276, "pid": 2338708, "tid": 2379421, "ts": 6345940479816.994, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940479818.596, "dur": 4.445, + "args": { + "External id": 988306,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940479821.991, "dur": 0.927, + "args": { + "External id": 988307,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940479829.250, "dur": 10.546, + "args": { + "External id": 988308,"Record function id": 0, "Sequence number": 10552642, "Fwd thread id": 1, "Ev Idx": 6291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940479834.065, "dur": 3.780, + "args": { + "External id": 988309,"Sequence number": 10552642, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6292 + } + }, + { + "ph": "f", "id": 277, "pid": 2338708, "tid": 2379421, "ts": 6345940479834.065, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940479835.622, "dur": 2.048, + "args": { + "External id": 988310,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940479836.694, "dur": 0.835, + "args": { + "External id": 988311,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940479845.573, "dur": 12.739, + "args": { + "External id": 988312,"Record function id": 0, "Sequence number": 10552641, "Fwd thread id": 1, "Ev Idx": 6295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940479846.678, "dur": 9.025, + "args": { + "External id": 988313,"Sequence number": 10552641, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6296 + } + }, + { + "ph": "f", "id": 278, "pid": 2338708, "tid": 2379421, "ts": 6345940479846.678, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940479848.180, "dur": 7.355, + "args": { + "External id": 988314,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940479851.417, "dur": 3.972, + "args": { + "External id": 988315,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940479862.602, "dur": 252.384, + "args": { + "External id": 988316,"Record function id": 0, "Sequence number": 10552640, "Fwd thread id": 1, "Ev Idx": 6299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940479863.878, "dur": 240.100, + "args": { + "External id": 988317,"Sequence number": 10552640, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6300 + } + }, + { + "ph": "f", "id": 279, "pid": 2338708, "tid": 2379421, "ts": 6345940479863.878, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940479868.773, "dur": 8.326, + "args": { + "External id": 988318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940479871.299, "dur": 4.832, + "args": { + "External id": 988319,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940479874.307, "dur": 1.457, + "args": { + "External id": 988320,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940479879.048, "dur": 84.885, + "args": { + "External id": 988321,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940479967.866, "dur": 6.051, + "args": { + "External id": 988322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940479969.219, "dur": 3.729, + "args": { + "External id": 988323,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940479972.086, "dur": 0.673, + "args": { + "External id": 988324,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940479976.005, "dur": 4.354, + "args": { + "External id": 988325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940479977.708, "dur": 2.066, + "args": { + "External id": 988326,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940479979.258, "dur": 0.423, + "args": { + "External id": 988327,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940479983.963, "dur": 117.622, + "args": { + "External id": 988328,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480126.255, "dur": 11.190, + "args": { + "External id": 988329,"Record function id": 0, "Sequence number": 10552639, "Fwd thread id": 1, "Ev Idx": 6312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480127.801, "dur": 6.964, + "args": { + "External id": 988330,"Sequence number": 10552639, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6313 + } + }, + { + "ph": "f", "id": 280, "pid": 2338708, "tid": 2379421, "ts": 6345940480127.801, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940480130.218, "dur": 4.340, + "args": { + "External id": 988331,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940480131.544, "dur": 2.821, + "args": { + "External id": 988332,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480142.035, "dur": 12.086, + "args": { + "External id": 988333,"Record function id": 0, "Sequence number": 10552638, "Fwd thread id": 1, "Ev Idx": 6316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480143.011, "dur": 8.317, + "args": { + "External id": 988334,"Sequence number": 10552638, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6317 + } + }, + { + "ph": "f", "id": 281, "pid": 2338708, "tid": 2379421, "ts": 6345940480143.011, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940480144.410, "dur": 6.662, + "args": { + "External id": 988335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940480145.440, "dur": 5.022, + "args": { + "External id": 988336,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940480149.479, "dur": 0.873, + "args": { + "External id": 988337,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940480161.275, "dur": 14.809, + "args": { + "External id": 988338,"Record function id": 0, "Ev Idx": 6321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940480163.233, "dur": 11.943, + "args": { + "External id": 988339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940480166.618, "dur": 8.103, + "args": { + "External id": 988340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940480168.474, "dur": 6.120, + "args": { + "External id": 988341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480180.215, "dur": 7.895, + "args": { + "External id": 988342,"Record function id": 0, "Sequence number": 10552637, "Fwd thread id": 1, "Ev Idx": 6325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480181.820, "dur": 3.780, + "args": { + "External id": 988343,"Sequence number": 10552637, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6326 + } + }, + { + "ph": "f", "id": 282, "pid": 2338708, "tid": 2379421, "ts": 6345940480181.820, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940480183.101, "dur": 2.311, + "args": { + "External id": 988344,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940480184.013, "dur": 1.242, + "args": { + "External id": 988345,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480192.835, "dur": 123.452, + "args": { + "External id": 988346,"Record function id": 0, "Sequence number": 10552636, "Fwd thread id": 1, "Ev Idx": 6329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480196.548, "dur": 111.774, + "args": { + "External id": 988347,"Sequence number": 10552636, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6330 + } + }, + { + "ph": "f", "id": 283, "pid": 2338708, "tid": 2379421, "ts": 6345940480196.548, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940480199.196, "dur": 4.356, + "args": { + "External id": 988348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940480200.118, "dur": 2.897, + "args": { + "External id": 988349,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940480202.059, "dur": 0.808, + "args": { + "External id": 988350,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940480204.344, "dur": 40.720, + "args": { + "External id": 988351,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940480246.348, "dur": 10.421, + "args": { + "External id": 988352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940480247.142, "dur": 8.780, + "args": { + "External id": 988353,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940480252.803, "dur": 2.901, + "args": { + "External id": 988354,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940480258.056, "dur": 4.483, + "args": { + "External id": 988355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940480259.589, "dur": 2.446, + "args": { + "External id": 988356,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940480261.210, "dur": 0.736, + "args": { + "External id": 988357,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940480263.315, "dur": 44.026, + "args": { + "External id": 988358,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480322.188, "dur": 42.790, + "args": { + "External id": 988359,"Record function id": 0, "Sequence number": 10552635, "Fwd thread id": 1, "Ev Idx": 6342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480323.112, "dur": 7.469, + "args": { + "External id": 988360,"Sequence number": 10552635, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6343 + } + }, + { + "ph": "f", "id": 284, "pid": 2338708, "tid": 2379421, "ts": 6345940480323.112, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940480325.260, "dur": 5.146, + "args": { + "External id": 988361,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940480328.759, "dur": 1.455, + "args": { + "External id": 988362,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345940480334.568, "dur": 27.874, + "args": { + "External id": 988363,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480369.966, "dur": 9.503, + "args": { + "External id": 988364,"Record function id": 0, "Sequence number": 10552634, "Fwd thread id": 1, "Ev Idx": 6347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480371.113, "dur": 6.576, + "args": { + "External id": 988365,"Sequence number": 10552634, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6348 + } + }, + { + "ph": "f", "id": 285, "pid": 2338708, "tid": 2379421, "ts": 6345940480371.113, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940480372.481, "dur": 4.979, + "args": { + "External id": 988366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940480373.928, "dur": 2.883, + "args": { + "External id": 988367,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940480375.854, "dur": 0.846, + "args": { + "External id": 988368,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940480384.583, "dur": 8.712, + "args": { + "External id": 988369,"Record function id": 0, "Ev Idx": 6352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940480386.402, "dur": 6.317, + "args": { + "External id": 988370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940480387.729, "dur": 4.554, + "args": { + "External id": 988371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940480390.880, "dur": 1.283, + "args": { + "External id": 988372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480397.412, "dur": 7.989, + "args": { + "External id": 988373,"Record function id": 0, "Sequence number": 10552633, "Fwd thread id": 1, "Ev Idx": 6356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480398.768, "dur": 3.940, + "args": { + "External id": 988374,"Sequence number": 10552633, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6357 + } + }, + { + "ph": "f", "id": 286, "pid": 2338708, "tid": 2379421, "ts": 6345940480398.768, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940480400.218, "dur": 2.320, + "args": { + "External id": 988375,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940480401.324, "dur": 1.089, + "args": { + "External id": 988376,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480409.682, "dur": 114.476, + "args": { + "External id": 988377,"Record function id": 0, "Sequence number": 10552632, "Fwd thread id": 1, "Ev Idx": 6360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480410.736, "dur": 104.701, + "args": { + "External id": 988378,"Sequence number": 10552632, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6361 + } + }, + { + "ph": "f", "id": 287, "pid": 2338708, "tid": 2379421, "ts": 6345940480410.736, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940480413.370, "dur": 5.472, + "args": { + "External id": 988379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940480416.355, "dur": 1.936, + "args": { + "External id": 988380,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940480417.551, "dur": 0.605, + "args": { + "External id": 988381,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940480419.588, "dur": 40.374, + "args": { + "External id": 988382,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940480461.496, "dur": 4.453, + "args": { + "External id": 988383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940480462.229, "dur": 3.004, + "args": { + "External id": 988384,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940480464.123, "dur": 0.973, + "args": { + "External id": 988385,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940480467.368, "dur": 6.277, + "args": { + "External id": 988386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940480470.604, "dur": 2.570, + "args": { + "External id": 988387,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940480472.457, "dur": 0.642, + "args": { + "External id": 988388,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940480474.175, "dur": 40.254, + "args": { + "External id": 988389,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480529.932, "dur": 30.897, + "args": { + "External id": 988390,"Record function id": 0, "Sequence number": 10552631, "Fwd thread id": 1, "Ev Idx": 6373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480531.253, "dur": 4.723, + "args": { + "External id": 988391,"Sequence number": 10552631, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6374 + } + }, + { + "ph": "f", "id": 288, "pid": 2338708, "tid": 2379421, "ts": 6345940480531.253, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940480533.011, "dur": 2.798, + "args": { + "External id": 988392,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940480534.129, "dur": 1.499, + "args": { + "External id": 988393,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940480539.210, "dur": 19.332, + "args": { + "External id": 988394,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480565.326, "dur": 9.894, + "args": { + "External id": 988395,"Record function id": 0, "Sequence number": 10552630, "Fwd thread id": 1, "Ev Idx": 6378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940480568.189, "dur": 4.590, + "args": { + "External id": 988396,"Sequence number": 10552630, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6379 + } + }, + { + "ph": "f", "id": 289, "pid": 2338708, "tid": 2379421, "ts": 6345940480568.189, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940480568.943, "dur": 3.573, + "args": { + "External id": 988397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940480569.889, "dur": 1.977, + "args": { + "External id": 988398,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940480571.264, "dur": 0.433, + "args": { + "External id": 988399,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940480580.421, "dur": 5.840, + "args": { + "External id": 988400,"Record function id": 0, "Ev Idx": 6383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940480581.918, "dur": 3.711, + "args": { + "External id": 988401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940480582.989, "dur": 1.789, + "args": { + "External id": 988402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940480583.585, "dur": 1.083, + "args": { + "External id": 988403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940480591.687, "dur": 411.930, + "args": { + "External id": 988404,"Record function id": 0, "Sequence number": 10552629, "Fwd thread id": 1, "Ev Idx": 6387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940480593.427, "dur": 373.813, + "args": { + "External id": 988405,"Sequence number": 10552629, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6388 + } + }, + { + "ph": "f", "id": 290, "pid": 2338708, "tid": 2379421, "ts": 6345940480593.427, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940480632.830, "dur": 5.006, + "args": { + "External id": 988406,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940480636.485, "dur": 1.174, + "args": { + "External id": 988407,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940480654.930, "dur": 4.660, + "args": { + "External id": 988408,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940480670.883, "dur": 2.416, + "args": { + "External id": 988409,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940480851.166, "dur": 2.133, + "args": { + "External id": 988410,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940480857.831, "dur": 44.513, + "args": { + "External id": 988411,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940480872.604, "dur": 3.636, + "args": { + "External id": 988412,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940480908.967, "dur": 33.721, + "args": { + "External id": 988413,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940480913.265, "dur": 29.112, + "args": { + "External id": 988414,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940480917.915, "dur": 4.515, + "args": { + "External id": 988415,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940480924.050, "dur": 17.615, + "args": { + "External id": 988416,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345940480947.344, "dur": 2.851, + "args": { + "External id": 988417,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940480948.764, "dur": 1.235, + "args": { + "External id": 988418,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940480956.919, "dur": 2.635, + "args": { + "External id": 988419,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940480958.357, "dur": 1.087, + "args": { + "External id": 988420,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345940480977.935, "dur": 17.604, + "args": { + "External id": 988421,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940481039.201, "dur": 53.543, + "args": { + "External id": 988422,"Record function id": 0, "Ev Idx": 6405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940481042.157, "dur": 48.313, + "args": { + "External id": 988423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940481044.755, "dur": 6.214, + "args": { + "External id": 988424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940481048.423, "dur": 2.268, + "args": { + "External id": 988425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481100.801, "dur": 7.633, + "args": { + "External id": 988426,"Record function id": 0, "Sequence number": 10552628, "Fwd thread id": 1, "Ev Idx": 6409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481102.809, "dur": 1.688, + "args": { + "External id": 988427,"Sequence number": 10552628, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6410 + } + }, + { + "ph": "f", "id": 291, "pid": 2338708, "tid": 2379421, "ts": 6345940481102.809, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940481113.129, "dur": 475.269, + "args": { + "External id": 988428,"Record function id": 0, "Sequence number": 10552627, "Fwd thread id": 1, "Ev Idx": 6411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940481114.441, "dur": 463.092, + "args": { + "External id": 988429,"Sequence number": 10552627, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6412 + } + }, + { + "ph": "f", "id": 292, "pid": 2338708, "tid": 2379421, "ts": 6345940481114.441, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940481152.244, "dur": 10.856, + "args": { + "External id": 988430,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940481158.313, "dur": 4.428, + "args": { + "External id": 988431,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940481167.198, "dur": 8.215, + "args": { + "External id": 988432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940481171.539, "dur": 3.159, + "args": { + "External id": 988433,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481173.422, "dur": 1.113, + "args": { + "External id": 988434,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2379421, + "ts": 6345940481179.491, "dur": 108.477, + "args": { + "External id": 988435,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940481180.711, "dur": 3.642, + "args": { + "External id": 988436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940481181.637, "dur": 2.116, + "args": { + "External id": 988437,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481183.212, "dur": 0.446, + "args": { + "External id": 988438,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2379421, + "ts": 6345940481188.198, "dur": 98.963, + "args": { + "External id": 988439,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940481189.911, "dur": 96.183, + "args": { + "External id": 988440,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345940481293.008, "dur": 3.890, + "args": { + "External id": 988441,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940481294.962, "dur": 1.771, + "args": { + "External id": 988442,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940481334.293, "dur": 7.294, + "args": { + "External id": 988443,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940481343.375, "dur": 3.148, + "args": { + "External id": 988444,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940481347.911, "dur": 2.334, + "args": { + "External id": 988445,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940481392.821, "dur": 2.689, + "args": { + "External id": 988446,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940481394.175, "dur": 1.175, + "args": { + "External id": 988447,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338708, "tid": 2379421, + "ts": 6345940481420.459, "dur": 136.579, + "args": { + "External id": 988448,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345940481426.871, "dur": 5.980, + "args": { + "External id": 988449,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481430.739, "dur": 1.126, + "args": { + "External id": 988450,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940481434.605, "dur": 7.783, + "args": { + "External id": 988451,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481440.826, "dur": 0.785, + "args": { + "External id": 988452,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345940481443.994, "dur": 3.130, + "args": { + "External id": 988453,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481446.207, "dur": 0.535, + "args": { + "External id": 988454,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940481448.137, "dur": 3.252, + "args": { + "External id": 988455,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481450.267, "dur": 0.638, + "args": { + "External id": 988456,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940481458.394, "dur": 5.832, + "args": { + "External id": 988457,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481460.957, "dur": 2.849, + "args": { + "External id": 988458,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940481465.442, "dur": 6.523, + "args": { + "External id": 988459,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940481469.588, "dur": 2.111, + "args": { + "External id": 988460,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940481473.474, "dur": 2.888, + "args": { + "External id": 988461,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481475.505, "dur": 0.506, + "args": { + "External id": 988462,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940481477.284, "dur": 2.955, + "args": { + "External id": 988463,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940481478.443, "dur": 1.679, + "args": { + "External id": 988464,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345940481481.349, "dur": 59.921, + "args": { + "External id": 988465,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940481546.331, "dur": 1.301, + "args": { + "External id": 988466,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940481548.686, "dur": 3.561, + "args": { + "External id": 988467,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481551.015, "dur": 0.534, + "args": { + "External id": 988468,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940481554.700, "dur": 1.050, + "args": { + "External id": 988469,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940481599.155, "dur": 10.611, + "args": { + "External id": 988470,"Record function id": 0, "Ev Idx": 6453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940481601.349, "dur": 7.705, + "args": { + "External id": 988471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940481604.041, "dur": 3.803, + "args": { + "External id": 988472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940481605.379, "dur": 2.324, + "args": { + "External id": 988473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481614.176, "dur": 12.082, + "args": { + "External id": 988474,"Record function id": 0, "Sequence number": 10552626, "Fwd thread id": 1, "Ev Idx": 6457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481615.602, "dur": 7.744, + "args": { + "External id": 988475,"Sequence number": 10552626, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6458 + } + }, + { + "ph": "f", "id": 293, "pid": 2338708, "tid": 2379421, "ts": 6345940481615.602, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940481617.726, "dur": 5.355, + "args": { + "External id": 988476,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940481621.663, "dur": 1.234, + "args": { + "External id": 988477,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481630.635, "dur": 126.913, + "args": { + "External id": 988478,"Record function id": 0, "Sequence number": 10552625, "Fwd thread id": 1, "Ev Idx": 6461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481631.678, "dur": 118.120, + "args": { + "External id": 988479,"Sequence number": 10552625, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6462 + } + }, + { + "ph": "f", "id": 294, "pid": 2338708, "tid": 2379421, "ts": 6345940481631.678, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940481635.167, "dur": 4.557, + "args": { + "External id": 988480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940481636.598, "dur": 2.455, + "args": { + "External id": 988481,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481638.315, "dur": 0.563, + "args": { + "External id": 988482,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940481643.430, "dur": 48.769, + "args": { + "External id": 988483,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940481693.986, "dur": 4.884, + "args": { + "External id": 988484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940481695.085, "dur": 3.026, + "args": { + "External id": 988485,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481697.065, "dur": 0.812, + "args": { + "External id": 988486,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940481700.589, "dur": 8.857, + "args": { + "External id": 988487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940481701.743, "dur": 7.138, + "args": { + "External id": 988488,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481705.797, "dur": 2.996, + "args": { + "External id": 988489,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940481710.092, "dur": 38.624, + "args": { + "External id": 988490,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481763.416, "dur": 7.663, + "args": { + "External id": 988491,"Record function id": 0, "Sequence number": 10552624, "Fwd thread id": 1, "Ev Idx": 6474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481764.619, "dur": 4.956, + "args": { + "External id": 988492,"Sequence number": 10552624, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6475 + } + }, + { + "ph": "f", "id": 295, "pid": 2338708, "tid": 2379421, "ts": 6345940481764.619, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940481766.725, "dur": 2.655, + "args": { + "External id": 988493,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940481767.921, "dur": 1.299, + "args": { + "External id": 988494,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481774.869, "dur": 11.196, + "args": { + "External id": 988495,"Record function id": 0, "Sequence number": 10552623, "Fwd thread id": 1, "Ev Idx": 6478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481775.891, "dur": 7.859, + "args": { + "External id": 988496,"Sequence number": 10552623, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6479 + } + }, + { + "ph": "f", "id": 296, "pid": 2338708, "tid": 2379421, "ts": 6345940481775.891, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940481776.988, "dur": 6.467, + "args": { + "External id": 988497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940481780.646, "dur": 2.216, + "args": { + "External id": 988498,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481782.134, "dur": 0.526, + "args": { + "External id": 988499,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940481790.684, "dur": 5.759, + "args": { + "External id": 988500,"Record function id": 0, "Ev Idx": 6483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940481792.209, "dur": 3.661, + "args": { + "External id": 988501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940481793.376, "dur": 2.194, + "args": { + "External id": 988502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940481794.236, "dur": 1.232, + "args": { + "External id": 988503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481800.707, "dur": 7.389, + "args": { + "External id": 988504,"Record function id": 0, "Sequence number": 10552622, "Fwd thread id": 1, "Ev Idx": 6487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481801.834, "dur": 3.769, + "args": { + "External id": 988505,"Sequence number": 10552622, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6488 + } + }, + { + "ph": "f", "id": 297, "pid": 2338708, "tid": 2379421, "ts": 6345940481801.834, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940481803.059, "dur": 2.374, + "args": { + "External id": 988506,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940481804.300, "dur": 0.967, + "args": { + "External id": 988507,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481814.457, "dur": 110.056, + "args": { + "External id": 988508,"Record function id": 0, "Sequence number": 10552621, "Fwd thread id": 1, "Ev Idx": 6491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481815.705, "dur": 100.242, + "args": { + "External id": 988509,"Sequence number": 10552621, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6492 + } + }, + { + "ph": "f", "id": 298, "pid": 2338708, "tid": 2379421, "ts": 6345940481815.705, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940481817.750, "dur": 3.363, + "args": { + "External id": 988510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940481818.345, "dur": 2.239, + "args": { + "External id": 988511,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481819.937, "dur": 0.472, + "args": { + "External id": 988512,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940481821.720, "dur": 37.404, + "args": { + "External id": 988513,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940481860.720, "dur": 10.308, + "args": { + "External id": 988514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940481864.240, "dur": 6.145, + "args": { + "External id": 988515,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481866.114, "dur": 4.092, + "args": { + "External id": 988516,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940481872.151, "dur": 4.302, + "args": { + "External id": 988517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940481873.416, "dur": 2.496, + "args": { + "External id": 988518,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481875.345, "dur": 0.459, + "args": { + "External id": 988519,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940481877.246, "dur": 37.704, + "args": { + "External id": 988520,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481930.092, "dur": 37.733, + "args": { + "External id": 988521,"Record function id": 0, "Sequence number": 10552620, "Fwd thread id": 1, "Ev Idx": 6504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481933.927, "dur": 5.157, + "args": { + "External id": 988522,"Sequence number": 10552620, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6505 + } + }, + { + "ph": "f", "id": 299, "pid": 2338708, "tid": 2379421, "ts": 6345940481933.927, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940481936.006, "dur": 2.910, + "args": { + "External id": 988523,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940481937.225, "dur": 1.526, + "args": { + "External id": 988524,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345940481942.285, "dur": 22.397, + "args": { + "External id": 988525,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481972.700, "dur": 8.457, + "args": { + "External id": 988526,"Record function id": 0, "Sequence number": 10552619, "Fwd thread id": 1, "Ev Idx": 6509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940481973.600, "dur": 5.520, + "args": { + "External id": 988527,"Sequence number": 10552619, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6510 + } + }, + { + "ph": "f", "id": 300, "pid": 2338708, "tid": 2379421, "ts": 6345940481973.600, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940481974.873, "dur": 4.002, + "args": { + "External id": 988528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940481975.675, "dur": 2.589, + "args": { + "External id": 988529,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940481977.556, "dur": 0.551, + "args": { + "External id": 988530,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940481985.821, "dur": 7.962, + "args": { + "External id": 988531,"Record function id": 0, "Ev Idx": 6514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940481987.557, "dur": 5.623, + "args": { + "External id": 988532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940481988.564, "dur": 4.227, + "args": { + "External id": 988533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940481991.500, "dur": 1.177, + "args": { + "External id": 988534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940481998.663, "dur": 562.997, + "args": { + "External id": 988535,"Record function id": 0, "Sequence number": 10552618, "Fwd thread id": 1, "Ev Idx": 6518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940482000.391, "dur": 518.054, + "args": { + "External id": 988536,"Sequence number": 10552618, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6519 + } + }, + { + "ph": "f", "id": 301, "pid": 2338708, "tid": 2379421, "ts": 6345940482000.391, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2379421, + "ts": 6345940482046.371, "dur": 84.883, + "args": { + "External id": 988537,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345940482048.287, "dur": 82.694, + "args": { + "External id": 988538,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940482090.657, "dur": 10.784, + "args": { + "External id": 988539,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940482094.926, "dur": 5.599, + "args": { + "External id": 988540,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940482102.887, "dur": 27.547, + "args": { + "External id": 988541,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940482146.912, "dur": 6.812, + "args": { + "External id": 988542,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940482148.334, "dur": 5.250, + "args": { + "External id": 988543,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940482161.776, "dur": 2.219, + "args": { + "External id": 988544,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940482162.945, "dur": 0.889, + "args": { + "External id": 988545,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940482178.455, "dur": 3.743, + "args": { + "External id": 988546,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940482194.787, "dur": 2.803, + "args": { + "External id": 988547,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940482391.834, "dur": 4.729, + "args": { + "External id": 988548,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940482401.036, "dur": 39.789, + "args": { + "External id": 988549,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940482413.305, "dur": 1.129, + "args": { + "External id": 988550,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940482447.344, "dur": 32.286, + "args": { + "External id": 988551,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940482449.418, "dur": 29.916, + "args": { + "External id": 988552,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940482454.637, "dur": 4.813, + "args": { + "External id": 988553,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940482461.102, "dur": 17.540, + "args": { + "External id": 988554,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345940482487.135, "dur": 3.205, + "args": { + "External id": 988555,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940482488.616, "dur": 1.549, + "args": { + "External id": 988556,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940482497.105, "dur": 5.677, + "args": { + "External id": 988557,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940482498.443, "dur": 4.207, + "args": { + "External id": 988558,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940482505.329, "dur": 2.418, + "args": { + "External id": 988559,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940482506.505, "dur": 1.120, + "args": { + "External id": 988560,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940482539.105, "dur": 20.772, + "args": { + "External id": 988561,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940482577.245, "dur": 10.927, + "args": { + "External id": 988562,"Record function id": 0, "Ev Idx": 6545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940482580.026, "dur": 7.398, + "args": { + "External id": 988563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940482582.685, "dur": 3.668, + "args": { + "External id": 988564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940482583.923, "dur": 2.288, + "args": { + "External id": 988565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940482592.631, "dur": 8.451, + "args": { + "External id": 988566,"Record function id": 0, "Sequence number": 10552617, "Fwd thread id": 1, "Ev Idx": 6549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940482593.952, "dur": 4.451, + "args": { + "External id": 988567,"Sequence number": 10552617, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6550 + } + }, + { + "ph": "f", "id": 302, "pid": 2338708, "tid": 2379421, "ts": 6345940482593.952, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940482595.670, "dur": 2.476, + "args": { + "External id": 988568,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940482596.771, "dur": 1.208, + "args": { + "External id": 988569,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940482605.624, "dur": 156.226, + "args": { + "External id": 988570,"Record function id": 0, "Sequence number": 10552616, "Fwd thread id": 1, "Ev Idx": 6553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940482608.904, "dur": 144.963, + "args": { + "External id": 988571,"Sequence number": 10552616, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6554 + } + }, + { + "ph": "f", "id": 303, "pid": 2338708, "tid": 2379421, "ts": 6345940482608.904, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940482612.794, "dur": 5.840, + "args": { + "External id": 988572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940482614.435, "dur": 3.545, + "args": { + "External id": 988573,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940482616.611, "dur": 1.131, + "args": { + "External id": 988574,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940482619.759, "dur": 75.321, + "args": { + "External id": 988575,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940482696.784, "dur": 8.241, + "args": { + "External id": 988576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940482697.603, "dur": 6.737, + "args": { + "External id": 988577,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940482701.449, "dur": 2.680, + "args": { + "External id": 988578,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940482706.652, "dur": 4.341, + "args": { + "External id": 988579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940482707.671, "dur": 2.545, + "args": { + "External id": 988580,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940482709.529, "dur": 0.608, + "args": { + "External id": 988581,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940482711.663, "dur": 41.285, + "args": { + "External id": 988582,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940482768.005, "dur": 7.761, + "args": { + "External id": 988583,"Record function id": 0, "Sequence number": 10552615, "Fwd thread id": 1, "Ev Idx": 6566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940482768.927, "dur": 4.783, + "args": { + "External id": 988584,"Sequence number": 10552615, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6567 + } + }, + { + "ph": "f", "id": 304, "pid": 2338708, "tid": 2379421, "ts": 6345940482768.927, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940482770.716, "dur": 2.824, + "args": { + "External id": 988585,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940482772.056, "dur": 1.365, + "args": { + "External id": 988586,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940482779.837, "dur": 11.355, + "args": { + "External id": 988587,"Record function id": 0, "Sequence number": 10552614, "Fwd thread id": 1, "Ev Idx": 6570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940482780.789, "dur": 7.960, + "args": { + "External id": 988588,"Sequence number": 10552614, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6571 + } + }, + { + "ph": "f", "id": 305, "pid": 2338708, "tid": 2379421, "ts": 6345940482780.789, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940482784.744, "dur": 3.753, + "args": { + "External id": 988589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940482785.856, "dur": 2.038, + "args": { + "External id": 988590,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940482787.263, "dur": 0.459, + "args": { + "External id": 988591,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940482795.687, "dur": 9.015, + "args": { + "External id": 988592,"Record function id": 0, "Ev Idx": 6575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940482797.305, "dur": 6.807, + "args": { + "External id": 988593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940482798.676, "dur": 5.140, + "args": { + "External id": 988594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940482799.508, "dur": 4.188, + "args": { + "External id": 988595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940482808.422, "dur": 9.602, + "args": { + "External id": 988596,"Record function id": 0, "Sequence number": 10552613, "Fwd thread id": 1, "Ev Idx": 6579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940482809.390, "dur": 6.372, + "args": { + "External id": 988597,"Sequence number": 10552613, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6580 + } + }, + { + "ph": "f", "id": 306, "pid": 2338708, "tid": 2379421, "ts": 6345940482809.390, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940482810.558, "dur": 5.042, + "args": { + "External id": 988598,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940482814.117, "dur": 1.306, + "args": { + "External id": 988599,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940482823.553, "dur": 462.411, + "args": { + "External id": 988600,"Record function id": 0, "Sequence number": 10552612, "Fwd thread id": 1, "Ev Idx": 6583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940482824.751, "dur": 440.794, + "args": { + "External id": 988601,"Sequence number": 10552612, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6584 + } + }, + { + "ph": "f", "id": 307, "pid": 2338708, "tid": 2379421, "ts": 6345940482824.751, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940482841.630, "dur": 7.719, + "args": { + "External id": 988602,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940482845.068, "dur": 3.715, + "args": { + "External id": 988603,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940482851.567, "dur": 4.404, + "args": { + "External id": 988604,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940482854.058, "dur": 1.689, + "args": { + "External id": 988605,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940482857.711, "dur": 4.840, + "args": { + "External id": 988606,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940482860.387, "dur": 1.948, + "args": { + "External id": 988607,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940482889.982, "dur": 345.119, + "args": { + "External id": 988608,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940482980.340, "dur": 3.497, + "args": { + "External id": 988609,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940482986.280, "dur": 3.337, + "args": { + "External id": 988610,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940482991.023, "dur": 2.846, + "args": { + "External id": 988611,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940482994.886, "dur": 4.665, + "args": { + "External id": 988612,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940483117.479, "dur": 4.045, + "args": { + "External id": 988613,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940483118.915, "dur": 2.261, + "args": { + "External id": 988614,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940483123.609, "dur": 34.549, + "args": { + "External id": 988615,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940483129.946, "dur": 2.891, + "args": { + "External id": 988616,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940483159.777, "dur": 2.214, + "args": { + "External id": 988617,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940483161.074, "dur": 0.823, + "args": { + "External id": 988618,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940483162.713, "dur": 18.387, + "args": { + "External id": 988619,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940483165.849, "dur": 0.648, + "args": { + "External id": 988620,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940483250.835, "dur": 4.544, + "args": { + "External id": 988621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940483258.809, "dur": 0.849, + "args": { + "External id": 988622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940483261.934, "dur": 0.593, + "args": { + "External id": 988623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940483299.151, "dur": 258.567, + "args": { + "External id": 988624,"Record function id": 0, "Sequence number": 10552611, "Fwd thread id": 1, "Ev Idx": 6607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940483300.995, "dur": 249.196, + "args": { + "External id": 988625,"Sequence number": 10552611, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6608 + } + }, + { + "ph": "f", "id": 308, "pid": 2338708, "tid": 2379421, "ts": 6345940483300.995, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345940483326.764, "dur": 50.276, + "args": { + "External id": 988626,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940483331.036, "dur": 5.230, + "args": { + "External id": 988627,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940483338.018, "dur": 38.442, + "args": { + "External id": 988628,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940483388.392, "dur": 6.190, + "args": { + "External id": 988629,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940483391.287, "dur": 2.876, + "args": { + "External id": 988630,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940483565.872, "dur": 193.271, + "args": { + "External id": 988631,"Record function id": 0, "Sequence number": 10552610, "Fwd thread id": 1, "Ev Idx": 6614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940483567.998, "dur": 183.557, + "args": { + "External id": 988632,"Sequence number": 10552610, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6615 + } + }, + { + "ph": "f", "id": 309, "pid": 2338708, "tid": 2379421, "ts": 6345940483567.998, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345940483581.553, "dur": 53.526, + "args": { + "External id": 988633,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940483584.711, "dur": 3.637, + "args": { + "External id": 988634,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940483589.426, "dur": 44.986, + "args": { + "External id": 988635,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940483643.427, "dur": 5.480, + "args": { + "External id": 988636,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940483645.905, "dur": 2.651, + "args": { + "External id": 988637,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940483766.338, "dur": 16.230, + "args": { + "External id": 988638,"Record function id": 0, "Sequence number": 10552609, "Fwd thread id": 1, "Ev Idx": 6621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940483770.332, "dur": 9.621, + "args": { + "External id": 988639,"Sequence number": 10552609, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6622 + } + }, + { + "ph": "f", "id": 310, "pid": 2338708, "tid": 2379421, "ts": 6345940483770.332, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940483773.639, "dur": 5.951, + "args": { + "External id": 988640,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940483775.270, "dur": 4.096, + "args": { + "External id": 988641,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940483786.986, "dur": 7.447, + "args": { + "External id": 988642,"Record function id": 0, "Sequence number": 10552608, "Fwd thread id": 1, "Ev Idx": 6625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940483788.082, "dur": 4.121, + "args": { + "External id": 988643,"Sequence number": 10552608, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6626 + } + }, + { + "ph": "f", "id": 311, "pid": 2338708, "tid": 2379421, "ts": 6345940483788.082, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940483789.632, "dur": 2.410, + "args": { + "External id": 988644,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940483790.683, "dur": 1.181, + "args": { + "External id": 988645,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940483798.616, "dur": 8.931, + "args": { + "External id": 988646,"Record function id": 0, "Sequence number": 10552607, "Fwd thread id": 1, "Ev Idx": 6629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940483799.560, "dur": 6.261, + "args": { + "External id": 988647,"Sequence number": 10552607, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6630 + } + }, + { + "ph": "f", "id": 312, "pid": 2338708, "tid": 2379421, "ts": 6345940483799.560, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940483801.044, "dur": 4.616, + "args": { + "External id": 988648,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940483804.513, "dur": 1.028, + "args": { + "External id": 988649,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940483811.437, "dur": 9.104, + "args": { + "External id": 988650,"Record function id": 0, "Sequence number": 10552606, "Fwd thread id": 1, "Ev Idx": 6633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940483812.932, "dur": 5.710, + "args": { + "External id": 988651,"Sequence number": 10552606, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6634 + } + }, + { + "ph": "f", "id": 313, "pid": 2338708, "tid": 2379421, "ts": 6345940483812.932, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940483814.303, "dur": 4.173, + "args": { + "External id": 988652,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940483814.952, "dur": 3.367, + "args": { + "External id": 988653,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940483824.691, "dur": 171.802, + "args": { + "External id": 988654,"Record function id": 0, "Sequence number": 10552605, "Fwd thread id": 1, "Ev Idx": 6637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940483825.856, "dur": 162.732, + "args": { + "External id": 988655,"Sequence number": 10552605, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6638 + } + }, + { + "ph": "f", "id": 314, "pid": 2338708, "tid": 2379421, "ts": 6345940483825.856, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940483829.819, "dur": 9.820, + "args": { + "External id": 988656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940483832.003, "dur": 6.779, + "args": { + "External id": 988657,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940483836.807, "dur": 1.653, + "args": { + "External id": 988658,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940483841.346, "dur": 74.139, + "args": { + "External id": 988659,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940483917.103, "dur": 10.415, + "args": { + "External id": 988660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940483918.011, "dur": 8.739, + "args": { + "External id": 988661,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940483924.505, "dur": 2.001, + "args": { + "External id": 988662,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940483929.894, "dur": 6.083, + "args": { + "External id": 988663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940483933.760, "dur": 1.562, + "args": { + "External id": 988664,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940483934.846, "dur": 0.353, + "args": { + "External id": 988665,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940483936.750, "dur": 50.647, + "args": { + "External id": 988666,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484002.456, "dur": 29.792, + "args": { + "External id": 988667,"Record function id": 0, "Sequence number": 10552604, "Fwd thread id": 1, "Ev Idx": 6650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484003.411, "dur": 25.669, + "args": { + "External id": 988668,"Sequence number": 10552604, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6651 + } + }, + { + "ph": "f", "id": 315, "pid": 2338708, "tid": 2379421, "ts": 6345940484003.411, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940484005.280, "dur": 23.594, + "args": { + "External id": 988669,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940484006.560, "dur": 21.749, + "args": { + "External id": 988670,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484039.103, "dur": 95.231, + "args": { + "External id": 988671,"Record function id": 0, "Sequence number": 10552603, "Fwd thread id": 1, "Ev Idx": 6654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484043.140, "dur": 87.260, + "args": { + "External id": 988672,"Sequence number": 10552603, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6655 + } + }, + { + "ph": "f", "id": 316, "pid": 2338708, "tid": 2379421, "ts": 6345940484043.140, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940484122.711, "dur": 7.408, + "args": { + "External id": 988673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940484125.000, "dur": 3.984, + "args": { + "External id": 988674,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940484127.727, "dur": 0.847, + "args": { + "External id": 988675,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940484143.180, "dur": 13.331, + "args": { + "External id": 988676,"Record function id": 0, "Ev Idx": 6659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940484145.195, "dur": 10.441, + "args": { + "External id": 988677,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940484148.204, "dur": 6.942, + "args": { + "External id": 988678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940484149.555, "dur": 5.477, + "args": { + "External id": 988679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484160.743, "dur": 9.776, + "args": { + "External id": 988680,"Record function id": 0, "Sequence number": 10552602, "Fwd thread id": 1, "Ev Idx": 6663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484162.219, "dur": 5.807, + "args": { + "External id": 988681,"Sequence number": 10552602, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6664 + } + }, + { + "ph": "f", "id": 317, "pid": 2338708, "tid": 2379421, "ts": 6345940484162.219, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940484163.505, "dur": 4.355, + "args": { + "External id": 988682,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940484166.428, "dur": 1.324, + "args": { + "External id": 988683,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484174.500, "dur": 119.161, + "args": { + "External id": 988684,"Record function id": 0, "Sequence number": 10552601, "Fwd thread id": 1, "Ev Idx": 6667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484175.322, "dur": 109.069, + "args": { + "External id": 988685,"Sequence number": 10552601, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6668 + } + }, + { + "ph": "f", "id": 318, "pid": 2338708, "tid": 2379421, "ts": 6345940484175.322, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940484178.042, "dur": 2.892, + "args": { + "External id": 988686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940484178.717, "dur": 1.650, + "args": { + "External id": 988687,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940484179.800, "dur": 0.440, + "args": { + "External id": 988688,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940484184.181, "dur": 43.295, + "args": { + "External id": 988689,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940484229.049, "dur": 3.497, + "args": { + "External id": 988690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940484229.727, "dur": 2.227, + "args": { + "External id": 988691,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940484231.101, "dur": 0.695, + "args": { + "External id": 988692,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940484234.024, "dur": 5.660, + "args": { + "External id": 988693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940484235.025, "dur": 4.143, + "args": { + "External id": 988694,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940484238.559, "dur": 0.529, + "args": { + "External id": 988695,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940484240.236, "dur": 43.112, + "args": { + "External id": 988696,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484299.321, "dur": 40.376, + "args": { + "External id": 988697,"Record function id": 0, "Sequence number": 10552600, "Fwd thread id": 1, "Ev Idx": 6680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484300.332, "dur": 4.702, + "args": { + "External id": 988698,"Sequence number": 10552600, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6681 + } + }, + { + "ph": "f", "id": 319, "pid": 2338708, "tid": 2379421, "ts": 6345940484300.332, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940484302.749, "dur": 2.115, + "args": { + "External id": 988699,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940484303.536, "dur": 1.153, + "args": { + "External id": 988700,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345940484308.776, "dur": 27.958, + "args": { + "External id": 988701,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484344.144, "dur": 11.014, + "args": { + "External id": 988702,"Record function id": 0, "Sequence number": 10552599, "Fwd thread id": 1, "Ev Idx": 6685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484345.041, "dur": 7.684, + "args": { + "External id": 988703,"Sequence number": 10552599, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6686 + } + }, + { + "ph": "f", "id": 320, "pid": 2338708, "tid": 2379421, "ts": 6345940484345.041, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940484346.070, "dur": 6.422, + "args": { + "External id": 988704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940484346.790, "dur": 5.062, + "args": { + "External id": 988705,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940484351.074, "dur": 0.602, + "args": { + "External id": 988706,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940484360.147, "dur": 5.649, + "args": { + "External id": 988707,"Record function id": 0, "Ev Idx": 6690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940484361.627, "dur": 3.548, + "args": { + "External id": 988708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940484362.824, "dur": 1.946, + "args": { + "External id": 988709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940484363.381, "dur": 1.240, + "args": { + "External id": 988710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484369.903, "dur": 7.619, + "args": { + "External id": 988711,"Record function id": 0, "Sequence number": 10552598, "Fwd thread id": 1, "Ev Idx": 6694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484371.485, "dur": 3.241, + "args": { + "External id": 988712,"Sequence number": 10552598, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6695 + } + }, + { + "ph": "f", "id": 321, "pid": 2338708, "tid": 2379421, "ts": 6345940484371.485, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940484372.399, "dur": 2.156, + "args": { + "External id": 988713,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940484373.472, "dur": 0.974, + "args": { + "External id": 988714,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484381.702, "dur": 111.854, + "args": { + "External id": 988715,"Record function id": 0, "Sequence number": 10552597, "Fwd thread id": 1, "Ev Idx": 6698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484385.081, "dur": 99.880, + "args": { + "External id": 988716,"Sequence number": 10552597, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6699 + } + }, + { + "ph": "f", "id": 322, "pid": 2338708, "tid": 2379421, "ts": 6345940484385.081, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940484387.488, "dur": 2.938, + "args": { + "External id": 988717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940484388.063, "dur": 1.818, + "args": { + "External id": 988718,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940484389.246, "dur": 0.450, + "args": { + "External id": 988719,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940484391.069, "dur": 40.997, + "args": { + "External id": 988720,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940484433.765, "dur": 5.743, + "args": { + "External id": 988721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940484434.294, "dur": 4.487, + "args": { + "External id": 988722,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940484437.894, "dur": 0.763, + "args": { + "External id": 988723,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940484440.706, "dur": 3.437, + "args": { + "External id": 988724,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940484441.746, "dur": 1.872, + "args": { + "External id": 988725,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940484443.055, "dur": 0.430, + "args": { + "External id": 988726,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940484444.615, "dur": 39.558, + "args": { + "External id": 988727,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484499.486, "dur": 31.066, + "args": { + "External id": 988728,"Record function id": 0, "Sequence number": 10552596, "Fwd thread id": 1, "Ev Idx": 6711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484500.527, "dur": 6.637, + "args": { + "External id": 988729,"Sequence number": 10552596, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6712 + } + }, + { + "ph": "f", "id": 323, "pid": 2338708, "tid": 2379421, "ts": 6345940484500.527, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940484504.620, "dur": 2.361, + "args": { + "External id": 988730,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940484505.506, "dur": 1.301, + "args": { + "External id": 988731,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940484510.207, "dur": 18.012, + "args": { + "External id": 988732,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484534.821, "dur": 8.400, + "args": { + "External id": 988733,"Record function id": 0, "Sequence number": 10552595, "Fwd thread id": 1, "Ev Idx": 6716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484535.736, "dur": 5.099, + "args": { + "External id": 988734,"Sequence number": 10552595, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6717 + } + }, + { + "ph": "f", "id": 324, "pid": 2338708, "tid": 2379421, "ts": 6345940484535.736, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940484536.446, "dur": 4.115, + "args": { + "External id": 988735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940484537.406, "dur": 2.571, + "args": { + "External id": 988736,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940484539.319, "dur": 0.509, + "args": { + "External id": 988737,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940484547.816, "dur": 8.540, + "args": { + "External id": 988738,"Record function id": 0, "Ev Idx": 6721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940484549.859, "dur": 5.934, + "args": { + "External id": 988739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940484550.794, "dur": 4.302, + "args": { + "External id": 988740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940484553.846, "dur": 1.110, + "args": { + "External id": 988741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940484561.579, "dur": 405.445, + "args": { + "External id": 988742,"Record function id": 0, "Sequence number": 10552594, "Fwd thread id": 1, "Ev Idx": 6725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940484563.085, "dur": 372.423, + "args": { + "External id": 988743,"Sequence number": 10552594, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6726 + } + }, + { + "ph": "f", "id": 325, "pid": 2338708, "tid": 2379421, "ts": 6345940484563.085, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940484603.471, "dur": 2.371, + "args": { + "External id": 988744,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940484604.347, "dur": 1.278, + "args": { + "External id": 988745,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940484622.758, "dur": 4.361, + "args": { + "External id": 988746,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940484638.340, "dur": 2.521, + "args": { + "External id": 988747,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940484814.866, "dur": 2.672, + "args": { + "External id": 988748,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940484821.732, "dur": 42.950, + "args": { + "External id": 988749,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940484837.096, "dur": 0.996, + "args": { + "External id": 988750,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940484871.316, "dur": 35.944, + "args": { + "External id": 988751,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940484873.276, "dur": 33.690, + "args": { + "External id": 988752,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940484878.163, "dur": 5.538, + "args": { + "External id": 988753,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940484885.340, "dur": 20.956, + "args": { + "External id": 988754,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345940484912.557, "dur": 3.039, + "args": { + "External id": 988755,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940484914.332, "dur": 1.094, + "args": { + "External id": 988756,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940484924.669, "dur": 2.539, + "args": { + "External id": 988757,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940484925.736, "dur": 1.365, + "args": { + "External id": 988758,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940484945.524, "dur": 17.171, + "args": { + "External id": 988759,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940484978.762, "dur": 9.277, + "args": { + "External id": 988760,"Record function id": 0, "Ev Idx": 6743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940484981.333, "dur": 5.890, + "args": { + "External id": 988761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940484983.286, "dur": 2.656, + "args": { + "External id": 988762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940484984.243, "dur": 1.581, + "args": { + "External id": 988763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484992.943, "dur": 9.321, + "args": { + "External id": 988764,"Record function id": 0, "Sequence number": 10552593, "Fwd thread id": 1, "Ev Idx": 6747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940484997.002, "dur": 1.381, + "args": { + "External id": 988765,"Sequence number": 10552593, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6748 + } + }, + { + "ph": "f", "id": 326, "pid": 2338708, "tid": 2379421, "ts": 6345940484997.002, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940485027.972, "dur": 541.433, + "args": { + "External id": 988766,"Record function id": 0, "Sequence number": 10552592, "Fwd thread id": 1, "Ev Idx": 6749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940485034.128, "dur": 522.394, + "args": { + "External id": 988767,"Sequence number": 10552592, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6750 + } + }, + { + "ph": "f", "id": 327, "pid": 2338708, "tid": 2379421, "ts": 6345940485034.128, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940485119.637, "dur": 11.935, + "args": { + "External id": 988768,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940485125.857, "dur": 5.140, + "args": { + "External id": 988769,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 6752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940485135.609, "dur": 8.663, + "args": { + "External id": 988770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940485137.452, "dur": 6.021, + "args": { + "External id": 988771,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485142.112, "dur": 1.179, + "args": { + "External id": 988772,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2379421, + "ts": 6345940485148.673, "dur": 114.030, + "args": { + "External id": 988773,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 6756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940485149.809, "dur": 3.765, + "args": { + "External id": 988774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 6757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940485150.752, "dur": 2.108, + "args": { + "External id": 988775,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485152.166, "dur": 0.550, + "args": { + "External id": 988776,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2379421, + "ts": 6345940485155.196, "dur": 106.521, + "args": { + "External id": 988777,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940485158.955, "dur": 101.781, + "args": { + "External id": 988778,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 6761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345940485267.607, "dur": 3.692, + "args": { + "External id": 988779,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 6762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940485269.269, "dur": 1.850, + "args": { + "External id": 988780,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940485308.642, "dur": 5.007, + "args": { + "External id": 988781,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940485315.316, "dur": 5.065, + "args": { + "External id": 988782,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940485321.693, "dur": 2.384, + "args": { + "External id": 988783,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940485366.681, "dur": 2.700, + "args": { + "External id": 988784,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940485367.977, "dur": 1.209, + "args": { + "External id": 988785,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338708, "tid": 2379421, + "ts": 6345940485394.050, "dur": 142.354, + "args": { + "External id": 988786,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 6769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345940485400.237, "dur": 6.958, + "args": { + "External id": 988787,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485404.915, "dur": 1.198, + "args": { + "External id": 988788,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940485408.934, "dur": 7.838, + "args": { + "External id": 988789,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 6772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485414.786, "dur": 0.762, + "args": { + "External id": 988790,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 6773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345940485418.612, "dur": 3.523, + "args": { + "External id": 988791,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 6774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485421.040, "dur": 0.646, + "args": { + "External id": 988792,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940485422.941, "dur": 3.465, + "args": { + "External id": 988793,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485425.313, "dur": 0.513, + "args": { + "External id": 988794,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 6777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940485433.004, "dur": 3.671, + "args": { + "External id": 988795,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 6778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485435.767, "dur": 0.538, + "args": { + "External id": 988796,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 6779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940485437.916, "dur": 10.035, + "args": { + "External id": 988797,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 6780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940485442.592, "dur": 5.119, + "args": { + "External id": 988798,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 6781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940485449.705, "dur": 2.699, + "args": { + "External id": 988799,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 6782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485451.457, "dur": 0.623, + "args": { + "External id": 988800,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 6783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940485453.531, "dur": 2.813, + "args": { + "External id": 988801,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940485454.573, "dur": 1.665, + "args": { + "External id": 988802,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 6785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345940485457.617, "dur": 62.506, + "args": { + "External id": 988803,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 6786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940485525.097, "dur": 1.546, + "args": { + "External id": 988804,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 6787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940485527.737, "dur": 4.026, + "args": { + "External id": 988805,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 6788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485530.078, "dur": 0.749, + "args": { + "External id": 988806,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 6789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940485534.104, "dur": 1.082, + "args": { + "External id": 988807,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 6790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940485583.891, "dur": 10.077, + "args": { + "External id": 988808,"Record function id": 0, "Ev Idx": 6791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940485586.078, "dur": 7.091, + "args": { + "External id": 988809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940485588.250, "dur": 3.911, + "args": { + "External id": 988810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940485589.307, "dur": 2.727, + "args": { + "External id": 988811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 6794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485599.018, "dur": 10.442, + "args": { + "External id": 988812,"Record function id": 0, "Sequence number": 10552591, "Fwd thread id": 1, "Ev Idx": 6795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485600.236, "dur": 6.611, + "args": { + "External id": 988813,"Sequence number": 10552591, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6796 + } + }, + { + "ph": "f", "id": 328, "pid": 2338708, "tid": 2379421, "ts": 6345940485600.236, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940485602.420, "dur": 4.110, + "args": { + "External id": 988814,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940485605.315, "dur": 1.050, + "args": { + "External id": 988815,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485614.052, "dur": 127.960, + "args": { + "External id": 988816,"Record function id": 0, "Sequence number": 10552590, "Fwd thread id": 1, "Ev Idx": 6799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485615.131, "dur": 119.323, + "args": { + "External id": 988817,"Sequence number": 10552590, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6800 + } + }, + { + "ph": "f", "id": 329, "pid": 2338708, "tid": 2379421, "ts": 6345940485615.131, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940485618.697, "dur": 6.729, + "args": { + "External id": 988818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940485619.908, "dur": 4.829, + "args": { + "External id": 988819,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485621.511, "dur": 3.009, + "args": { + "External id": 988820,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940485628.968, "dur": 49.642, + "args": { + "External id": 988821,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940485680.306, "dur": 4.830, + "args": { + "External id": 988822,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940485681.286, "dur": 3.146, + "args": { + "External id": 988823,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485682.990, "dur": 1.253, + "args": { + "External id": 988824,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940485686.850, "dur": 7.077, + "args": { + "External id": 988825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940485688.258, "dur": 5.133, + "args": { + "External id": 988826,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485692.572, "dur": 0.744, + "args": { + "External id": 988827,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940485694.475, "dur": 39.137, + "args": { + "External id": 988828,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485747.950, "dur": 8.328, + "args": { + "External id": 988829,"Record function id": 0, "Sequence number": 10552589, "Fwd thread id": 1, "Ev Idx": 6812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485749.254, "dur": 5.159, + "args": { + "External id": 988830,"Sequence number": 10552589, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6813 + } + }, + { + "ph": "f", "id": 330, "pid": 2338708, "tid": 2379421, "ts": 6345940485749.254, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940485751.478, "dur": 2.741, + "args": { + "External id": 988831,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940485752.889, "dur": 1.206, + "args": { + "External id": 988832,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485760.811, "dur": 10.188, + "args": { + "External id": 988833,"Record function id": 0, "Sequence number": 10552588, "Fwd thread id": 1, "Ev Idx": 6816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485761.730, "dur": 7.007, + "args": { + "External id": 988834,"Sequence number": 10552588, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6817 + } + }, + { + "ph": "f", "id": 331, "pid": 2338708, "tid": 2379421, "ts": 6345940485761.730, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940485762.475, "dur": 5.983, + "args": { + "External id": 988835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940485763.457, "dur": 4.415, + "args": { + "External id": 988836,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485767.065, "dur": 0.682, + "args": { + "External id": 988837,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940485776.075, "dur": 7.964, + "args": { + "External id": 988838,"Record function id": 0, "Ev Idx": 6821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940485777.448, "dur": 5.957, + "args": { + "External id": 988839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940485778.770, "dur": 4.281, + "args": { + "External id": 988840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940485779.458, "dur": 3.514, + "args": { + "External id": 988841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485788.165, "dur": 6.914, + "args": { + "External id": 988842,"Record function id": 0, "Sequence number": 10552587, "Fwd thread id": 1, "Ev Idx": 6825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485789.414, "dur": 3.586, + "args": { + "External id": 988843,"Sequence number": 10552587, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 6826 + } + }, + { + "ph": "f", "id": 332, "pid": 2338708, "tid": 2379421, "ts": 6345940485789.414, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940485790.442, "dur": 2.377, + "args": { + "External id": 988844,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940485791.639, "dur": 1.062, + "args": { + "External id": 988845,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 6828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485799.274, "dur": 108.377, + "args": { + "External id": 988846,"Record function id": 0, "Sequence number": 10552586, "Fwd thread id": 1, "Ev Idx": 6829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485800.102, "dur": 98.690, + "args": { + "External id": 988847,"Sequence number": 10552586, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6830 + } + }, + { + "ph": "f", "id": 333, "pid": 2338708, "tid": 2379421, "ts": 6345940485800.102, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940485804.991, "dur": 2.871, + "args": { + "External id": 988848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 6831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940485805.589, "dur": 1.754, + "args": { + "External id": 988849,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 6832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485806.751, "dur": 0.469, + "args": { + "External id": 988850,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 6833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940485808.686, "dur": 38.749, + "args": { + "External id": 988851,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 6834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940485849.017, "dur": 6.620, + "args": { + "External id": 988852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940485849.869, "dur": 5.084, + "args": { + "External id": 988853,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 6836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485853.617, "dur": 1.208, + "args": { + "External id": 988854,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 6837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940485857.087, "dur": 3.147, + "args": { + "External id": 988855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940485858.055, "dur": 1.719, + "args": { + "External id": 988856,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485859.312, "dur": 0.387, + "args": { + "External id": 988857,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940485860.796, "dur": 37.221, + "args": { + "External id": 988858,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 6841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485912.926, "dur": 39.215, + "args": { + "External id": 988859,"Record function id": 0, "Sequence number": 10552585, "Fwd thread id": 1, "Ev Idx": 6842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485914.030, "dur": 7.272, + "args": { + "External id": 988860,"Sequence number": 10552585, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6843 + } + }, + { + "ph": "f", "id": 334, "pid": 2338708, "tid": 2379421, "ts": 6345940485914.030, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940485916.368, "dur": 4.758, + "args": { + "External id": 988861,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940485919.792, "dur": 1.157, + "args": { + "External id": 988862,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345940485924.793, "dur": 23.828, + "args": { + "External id": 988863,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485957.115, "dur": 7.670, + "args": { + "External id": 988864,"Record function id": 0, "Sequence number": 10552584, "Fwd thread id": 1, "Ev Idx": 6847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940485958.180, "dur": 4.475, + "args": { + "External id": 988865,"Sequence number": 10552584, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6848 + } + }, + { + "ph": "f", "id": 335, "pid": 2338708, "tid": 2379421, "ts": 6345940485958.180, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940485958.935, "dur": 3.426, + "args": { + "External id": 988866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 6849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940485959.666, "dur": 2.105, + "args": { + "External id": 988867,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 6850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940485961.021, "dur": 0.567, + "args": { + "External id": 988868,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 6851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940485969.799, "dur": 8.821, + "args": { + "External id": 988869,"Record function id": 0, "Ev Idx": 6852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940485971.383, "dur": 6.624, + "args": { + "External id": 988870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940485972.341, "dur": 5.165, + "args": { + "External id": 988871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940485975.414, "dur": 1.973, + "args": { + "External id": 988872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 6855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940485983.756, "dur": 585.592, + "args": { + "External id": 988873,"Record function id": 0, "Sequence number": 10552583, "Fwd thread id": 1, "Ev Idx": 6856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940485985.601, "dur": 540.941, + "args": { + "External id": 988874,"Sequence number": 10552583, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 6857 + } + }, + { + "ph": "f", "id": 336, "pid": 2338708, "tid": 2379421, "ts": 6345940485985.601, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2379421, + "ts": 6345940486030.824, "dur": 107.455, + "args": { + "External id": 988875,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345940486032.657, "dur": 105.177, + "args": { + "External id": 988876,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940486036.373, "dur": 13.813, + "args": { + "External id": 988877,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 6860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940486044.952, "dur": 4.420, + "args": { + "External id": 988878,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940486051.422, "dur": 85.269, + "args": { + "External id": 988879,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940486157.521, "dur": 3.796, + "args": { + "External id": 988880,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940486158.562, "dur": 2.600, + "args": { + "External id": 988881,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940486169.097, "dur": 1.660, + "args": { + "External id": 988882,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940486169.737, "dur": 0.903, + "args": { + "External id": 988883,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940486186.282, "dur": 4.665, + "args": { + "External id": 988884,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940486203.670, "dur": 2.773, + "args": { + "External id": 988885,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940486401.449, "dur": 6.609, + "args": { + "External id": 988886,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 6869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940486412.731, "dur": 38.250, + "args": { + "External id": 988887,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 6870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940486423.988, "dur": 1.366, + "args": { + "External id": 988888,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 6871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940486457.757, "dur": 33.444, + "args": { + "External id": 988889,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 6872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940486459.725, "dur": 31.161, + "args": { + "External id": 988890,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 6873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940486464.586, "dur": 5.355, + "args": { + "External id": 988891,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940486471.621, "dur": 18.591, + "args": { + "External id": 988892,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 6875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345940486495.921, "dur": 5.000, + "args": { + "External id": 988893,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 6876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940486499.458, "dur": 1.268, + "args": { + "External id": 988894,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 6877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940486508.272, "dur": 2.375, + "args": { + "External id": 988895,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940486509.257, "dur": 1.271, + "args": { + "External id": 988896,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940486513.114, "dur": 2.130, + "args": { + "External id": 988897,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940486514.153, "dur": 0.987, + "args": { + "External id": 988898,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940486547.108, "dur": 20.452, + "args": { + "External id": 988899,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 6882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940486586.114, "dur": 13.240, + "args": { + "External id": 988900,"Record function id": 0, "Ev Idx": 6883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940486588.581, "dur": 9.880, + "args": { + "External id": 988901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940486591.361, "dur": 6.115, + "args": { + "External id": 988902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940486595.259, "dur": 2.081, + "args": { + "External id": 988903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 6886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940486603.823, "dur": 8.495, + "args": { + "External id": 988904,"Record function id": 0, "Sequence number": 10552582, "Fwd thread id": 1, "Ev Idx": 6887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940486604.883, "dur": 4.814, + "args": { + "External id": 988905,"Sequence number": 10552582, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6888 + } + }, + { + "ph": "f", "id": 337, "pid": 2338708, "tid": 2379421, "ts": 6345940486604.883, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940486606.743, "dur": 2.654, + "args": { + "External id": 988906,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940486607.774, "dur": 1.436, + "args": { + "External id": 988907,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940486616.542, "dur": 192.995, + "args": { + "External id": 988908,"Record function id": 0, "Sequence number": 10552581, "Fwd thread id": 1, "Ev Idx": 6891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940486617.619, "dur": 184.179, + "args": { + "External id": 988909,"Sequence number": 10552581, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6892 + } + }, + { + "ph": "f", "id": 338, "pid": 2338708, "tid": 2379421, "ts": 6345940486617.619, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940486622.208, "dur": 7.990, + "args": { + "External id": 988910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940486623.892, "dur": 5.645, + "args": { + "External id": 988911,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 6894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940486628.261, "dur": 0.982, + "args": { + "External id": 988912,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 6895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940486631.661, "dur": 92.277, + "args": { + "External id": 988913,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 6896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940486725.681, "dur": 6.598, + "args": { + "External id": 988914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940486726.702, "dur": 4.777, + "args": { + "External id": 988915,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940486728.509, "dur": 2.757, + "args": { + "External id": 988916,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940486734.068, "dur": 9.049, + "args": { + "External id": 988917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940486737.837, "dur": 4.654, + "args": { + "External id": 988918,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940486739.160, "dur": 3.239, + "args": { + "External id": 988919,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940486743.664, "dur": 57.218, + "args": { + "External id": 988920,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 6903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940486815.513, "dur": 8.537, + "args": { + "External id": 988921,"Record function id": 0, "Sequence number": 10552580, "Fwd thread id": 1, "Ev Idx": 6904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940486816.535, "dur": 5.001, + "args": { + "External id": 988922,"Sequence number": 10552580, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6905 + } + }, + { + "ph": "f", "id": 339, "pid": 2338708, "tid": 2379421, "ts": 6345940486816.535, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940486818.402, "dur": 2.948, + "args": { + "External id": 988923,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940486819.780, "dur": 1.408, + "args": { + "External id": 988924,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940486828.521, "dur": 10.765, + "args": { + "External id": 988925,"Record function id": 0, "Sequence number": 10552579, "Fwd thread id": 1, "Ev Idx": 6908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940486829.394, "dur": 7.301, + "args": { + "External id": 988926,"Sequence number": 10552579, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6909 + } + }, + { + "ph": "f", "id": 340, "pid": 2338708, "tid": 2379421, "ts": 6345940486829.394, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940486830.227, "dur": 6.235, + "args": { + "External id": 988927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 6910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940486833.460, "dur": 2.383, + "args": { + "External id": 988928,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 6911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940486834.984, "dur": 0.753, + "args": { + "External id": 988929,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 6912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940486844.360, "dur": 6.012, + "args": { + "External id": 988930,"Record function id": 0, "Ev Idx": 6913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940486845.842, "dur": 3.878, + "args": { + "External id": 988931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940486846.967, "dur": 2.475, + "args": { + "External id": 988932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940486847.727, "dur": 1.567, + "args": { + "External id": 988933,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 6916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940486854.261, "dur": 7.436, + "args": { + "External id": 988934,"Record function id": 0, "Sequence number": 10552578, "Fwd thread id": 1, "Ev Idx": 6917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940486855.123, "dur": 4.297, + "args": { + "External id": 988935,"Sequence number": 10552578, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 6918 + } + }, + { + "ph": "f", "id": 341, "pid": 2338708, "tid": 2379421, "ts": 6345940486855.123, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940486856.495, "dur": 2.754, + "args": { + "External id": 988936,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940486857.787, "dur": 1.282, + "args": { + "External id": 988937,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 6920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940486866.658, "dur": 516.668, + "args": { + "External id": 988938,"Record function id": 0, "Sequence number": 10552577, "Fwd thread id": 1, "Ev Idx": 6921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940486867.925, "dur": 493.363, + "args": { + "External id": 988939,"Sequence number": 10552577, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6922 + } + }, + { + "ph": "f", "id": 342, "pid": 2338708, "tid": 2379421, "ts": 6345940486867.925, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940486884.255, "dur": 7.358, + "args": { + "External id": 988940,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940486887.229, "dur": 3.886, + "args": { + "External id": 988941,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940486894.129, "dur": 3.807, + "args": { + "External id": 988942,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940486895.727, "dur": 1.912, + "args": { + "External id": 988943,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940486899.611, "dur": 6.384, + "args": { + "External id": 988944,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940486901.215, "dur": 4.572, + "args": { + "External id": 988945,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940486933.093, "dur": 398.269, + "args": { + "External id": 988946,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 6929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940487051.432, "dur": 45.648, + "args": { + "External id": 988947,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940487100.910, "dur": 3.309, + "args": { + "External id": 988948,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940487105.617, "dur": 2.969, + "args": { + "External id": 988949,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940487110.016, "dur": 2.165, + "args": { + "External id": 988950,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940487202.865, "dur": 3.896, + "args": { + "External id": 988951,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940487204.570, "dur": 2.071, + "args": { + "External id": 988952,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940487211.289, "dur": 34.116, + "args": { + "External id": 988953,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940487218.368, "dur": 2.304, + "args": { + "External id": 988954,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940487247.109, "dur": 2.531, + "args": { + "External id": 988955,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940487248.822, "dur": 0.721, + "args": { + "External id": 988956,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940487250.763, "dur": 24.675, + "args": { + "External id": 988957,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 6940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940487253.569, "dur": 0.791, + "args": { + "External id": 988958,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 6941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940487346.389, "dur": 4.779, + "args": { + "External id": 988959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940487354.483, "dur": 0.851, + "args": { + "External id": 988960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940487357.587, "dur": 0.776, + "args": { + "External id": 988961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940487396.522, "dur": 271.759, + "args": { + "External id": 988962,"Record function id": 0, "Sequence number": 10552576, "Fwd thread id": 1, "Ev Idx": 6945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940487398.862, "dur": 261.185, + "args": { + "External id": 988963,"Sequence number": 10552576, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6946 + } + }, + { + "ph": "f", "id": 343, "pid": 2338708, "tid": 2379421, "ts": 6345940487398.862, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345940487423.119, "dur": 56.788, + "args": { + "External id": 988964,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940487427.544, "dur": 7.048, + "args": { + "External id": 988965,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940487436.427, "dur": 42.777, + "args": { + "External id": 988966,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 6949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940487491.224, "dur": 6.413, + "args": { + "External id": 988967,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 6950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940487494.343, "dur": 2.905, + "args": { + "External id": 988968,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940487677.114, "dur": 199.803, + "args": { + "External id": 988969,"Record function id": 0, "Sequence number": 10552575, "Fwd thread id": 1, "Ev Idx": 6952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940487678.955, "dur": 190.163, + "args": { + "External id": 988970,"Sequence number": 10552575, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6953 + } + }, + { + "ph": "f", "id": 344, "pid": 2338708, "tid": 2379421, "ts": 6345940487678.955, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345940487692.887, "dur": 57.471, + "args": { + "External id": 988971,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940487700.917, "dur": 3.282, + "args": { + "External id": 988972,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940487708.132, "dur": 41.719, + "args": { + "External id": 988973,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 6956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940487758.822, "dur": 5.505, + "args": { + "External id": 988974,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 6957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940487760.633, "dur": 3.434, + "args": { + "External id": 988975,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940487884.398, "dur": 17.783, + "args": { + "External id": 988976,"Record function id": 0, "Sequence number": 10552574, "Fwd thread id": 1, "Ev Idx": 6959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940487888.310, "dur": 10.832, + "args": { + "External id": 988977,"Sequence number": 10552574, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6960 + } + }, + { + "ph": "f", "id": 345, "pid": 2338708, "tid": 2379421, "ts": 6345940487888.310, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940487891.533, "dur": 7.230, + "args": { + "External id": 988978,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940487893.025, "dur": 5.443, + "args": { + "External id": 988979,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940487906.562, "dur": 9.538, + "args": { + "External id": 988980,"Record function id": 0, "Sequence number": 10552573, "Fwd thread id": 1, "Ev Idx": 6963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940487908.107, "dur": 4.440, + "args": { + "External id": 988981,"Sequence number": 10552573, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 6964 + } + }, + { + "ph": "f", "id": 346, "pid": 2338708, "tid": 2379421, "ts": 6345940487908.107, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940487909.778, "dur": 2.604, + "args": { + "External id": 988982,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940487911.218, "dur": 0.995, + "args": { + "External id": 988983,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 6966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940487921.929, "dur": 11.465, + "args": { + "External id": 988984,"Record function id": 0, "Sequence number": 10552572, "Fwd thread id": 1, "Ev Idx": 6967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940487924.093, "dur": 7.150, + "args": { + "External id": 988985,"Sequence number": 10552572, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 6968 + } + }, + { + "ph": "f", "id": 347, "pid": 2338708, "tid": 2379421, "ts": 6345940487924.093, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940487926.023, "dur": 5.056, + "args": { + "External id": 988986,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940487929.647, "dur": 1.263, + "args": { + "External id": 988987,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 6970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940487937.920, "dur": 7.072, + "args": { + "External id": 988988,"Record function id": 0, "Sequence number": 10552571, "Fwd thread id": 1, "Ev Idx": 6971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940487939.285, "dur": 3.835, + "args": { + "External id": 988989,"Sequence number": 10552571, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 6972 + } + }, + { + "ph": "f", "id": 348, "pid": 2338708, "tid": 2379421, "ts": 6345940487939.285, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940487940.672, "dur": 2.293, + "args": { + "External id": 988990,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940487941.807, "dur": 1.046, + "args": { + "External id": 988991,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 6974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940487948.789, "dur": 261.808, + "args": { + "External id": 988992,"Record function id": 0, "Sequence number": 10552570, "Fwd thread id": 1, "Ev Idx": 6975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940487949.679, "dur": 247.934, + "args": { + "External id": 988993,"Sequence number": 10552570, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6976 + } + }, + { + "ph": "f", "id": 349, "pid": 2338708, "tid": 2379421, "ts": 6345940487949.679, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940487953.656, "dur": 11.510, + "args": { + "External id": 988994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 6977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940487956.119, "dur": 8.252, + "args": { + "External id": 988995,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 6978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940487960.532, "dur": 3.505, + "args": { + "External id": 988996,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 6979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940487967.074, "dur": 149.813, + "args": { + "External id": 988997,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 6980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940488120.536, "dur": 7.859, + "args": { + "External id": 988998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940488122.218, "dur": 4.705, + "args": { + "External id": 988999,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 6982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940488124.808, "dur": 1.923, + "args": { + "External id": 989000,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 6983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940488130.761, "dur": 6.294, + "args": { + "External id": 989001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940488131.859, "dur": 4.639, + "args": { + "External id": 989002,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940488135.818, "dur": 0.579, + "args": { + "External id": 989003,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940488138.042, "dur": 58.371, + "args": { + "External id": 989004,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 6987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488220.132, "dur": 9.387, + "args": { + "External id": 989005,"Record function id": 0, "Sequence number": 10552569, "Fwd thread id": 1, "Ev Idx": 6988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488221.729, "dur": 5.609, + "args": { + "External id": 989006,"Sequence number": 10552569, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 6989 + } + }, + { + "ph": "f", "id": 350, "pid": 2338708, "tid": 2379421, "ts": 6345940488221.729, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940488223.994, "dur": 3.186, + "args": { + "External id": 989007,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940488225.146, "dur": 1.836, + "args": { + "External id": 989008,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 6991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488233.462, "dur": 10.530, + "args": { + "External id": 989009,"Record function id": 0, "Sequence number": 10552568, "Fwd thread id": 1, "Ev Idx": 6992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488234.385, "dur": 7.605, + "args": { + "External id": 989010,"Sequence number": 10552568, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6993 + } + }, + { + "ph": "f", "id": 351, "pid": 2338708, "tid": 2379421, "ts": 6345940488234.385, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940488235.273, "dur": 6.489, + "args": { + "External id": 989011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 6994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940488236.136, "dur": 5.032, + "args": { + "External id": 989012,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 6995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940488240.419, "dur": 0.624, + "args": { + "External id": 989013,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 6996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940488250.809, "dur": 14.860, + "args": { + "External id": 989014,"Record function id": 0, "Ev Idx": 6997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940488253.010, "dur": 11.657, + "args": { + "External id": 989015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940488256.314, "dur": 7.909, + "args": { + "External id": 989016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 6999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940488258.050, "dur": 6.057, + "args": { + "External id": 989017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488269.729, "dur": 7.866, + "args": { + "External id": 989018,"Record function id": 0, "Sequence number": 10552567, "Fwd thread id": 1, "Ev Idx": 7001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488271.777, "dur": 3.797, + "args": { + "External id": 989019,"Sequence number": 10552567, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7002 + } + }, + { + "ph": "f", "id": 352, "pid": 2338708, "tid": 2379421, "ts": 6345940488271.777, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940488272.801, "dur": 2.597, + "args": { + "External id": 989020,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940488274.067, "dur": 1.136, + "args": { + "External id": 989021,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488281.781, "dur": 105.008, + "args": { + "External id": 989022,"Record function id": 0, "Sequence number": 10552566, "Fwd thread id": 1, "Ev Idx": 7005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488284.968, "dur": 94.409, + "args": { + "External id": 989023,"Sequence number": 10552566, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7006 + } + }, + { + "ph": "f", "id": 353, "pid": 2338708, "tid": 2379421, "ts": 6345940488284.968, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940488287.623, "dur": 3.157, + "args": { + "External id": 989024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940488288.325, "dur": 1.946, + "args": { + "External id": 989025,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940488289.554, "dur": 0.589, + "args": { + "External id": 989026,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940488291.678, "dur": 33.660, + "args": { + "External id": 989027,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940488326.910, "dur": 6.513, + "args": { + "External id": 989028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940488327.603, "dur": 5.206, + "args": { + "External id": 989029,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940488331.346, "dur": 1.308, + "args": { + "External id": 989030,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940488334.818, "dur": 3.028, + "args": { + "External id": 989031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940488335.591, "dur": 1.775, + "args": { + "External id": 989032,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940488336.666, "dur": 0.615, + "args": { + "External id": 989033,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940488338.400, "dur": 40.033, + "args": { + "External id": 989034,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488392.307, "dur": 41.970, + "args": { + "External id": 989035,"Record function id": 0, "Sequence number": 10552565, "Fwd thread id": 1, "Ev Idx": 7018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488393.595, "dur": 7.081, + "args": { + "External id": 989036,"Sequence number": 10552565, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7019 + } + }, + { + "ph": "f", "id": 354, "pid": 2338708, "tid": 2379421, "ts": 6345940488393.595, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940488397.920, "dur": 2.578, + "args": { + "External id": 989037,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940488399.127, "dur": 1.231, + "args": { + "External id": 989038,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345940488404.331, "dur": 27.311, + "args": { + "External id": 989039,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488438.904, "dur": 8.734, + "args": { + "External id": 989040,"Record function id": 0, "Sequence number": 10552564, "Fwd thread id": 1, "Ev Idx": 7023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488440.101, "dur": 5.577, + "args": { + "External id": 989041,"Sequence number": 10552564, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7024 + } + }, + { + "ph": "f", "id": 355, "pid": 2338708, "tid": 2379421, "ts": 6345940488440.101, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940488440.954, "dur": 4.502, + "args": { + "External id": 989042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940488442.020, "dur": 2.762, + "args": { + "External id": 989043,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940488443.629, "dur": 1.011, + "args": { + "External id": 989044,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940488452.387, "dur": 8.338, + "args": { + "External id": 989045,"Record function id": 0, "Ev Idx": 7028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940488454.099, "dur": 6.048, + "args": { + "External id": 989046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940488455.207, "dur": 4.603, + "args": { + "External id": 989047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940488458.590, "dur": 1.097, + "args": { + "External id": 989048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488464.398, "dur": 6.399, + "args": { + "External id": 989049,"Record function id": 0, "Sequence number": 10552563, "Fwd thread id": 1, "Ev Idx": 7032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488465.580, "dur": 3.539, + "args": { + "External id": 989050,"Sequence number": 10552563, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7033 + } + }, + { + "ph": "f", "id": 356, "pid": 2338708, "tid": 2379421, "ts": 6345940488465.580, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940488466.623, "dur": 2.326, + "args": { + "External id": 989051,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940488467.733, "dur": 1.049, + "args": { + "External id": 989052,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488474.549, "dur": 108.618, + "args": { + "External id": 989053,"Record function id": 0, "Sequence number": 10552562, "Fwd thread id": 1, "Ev Idx": 7036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488475.531, "dur": 99.772, + "args": { + "External id": 989054,"Sequence number": 10552562, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7037 + } + }, + { + "ph": "f", "id": 357, "pid": 2338708, "tid": 2379421, "ts": 6345940488475.531, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940488477.914, "dur": 5.213, + "args": { + "External id": 989055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940488480.833, "dur": 1.782, + "args": { + "External id": 989056,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940488482.038, "dur": 0.445, + "args": { + "External id": 989057,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940488483.671, "dur": 39.047, + "args": { + "External id": 989058,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940488524.406, "dur": 4.098, + "args": { + "External id": 989059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940488525.033, "dur": 2.823, + "args": { + "External id": 989060,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940488526.680, "dur": 1.042, + "args": { + "External id": 989061,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940488532.021, "dur": 3.360, + "args": { + "External id": 989062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940488533.090, "dur": 1.768, + "args": { + "External id": 989063,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940488534.149, "dur": 0.619, + "args": { + "External id": 989064,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940488535.952, "dur": 38.435, + "args": { + "External id": 989065,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488588.790, "dur": 27.468, + "args": { + "External id": 989066,"Record function id": 0, "Sequence number": 10552561, "Fwd thread id": 1, "Ev Idx": 7049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488589.761, "dur": 4.673, + "args": { + "External id": 989067,"Sequence number": 10552561, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7050 + } + }, + { + "ph": "f", "id": 358, "pid": 2338708, "tid": 2379421, "ts": 6345940488589.761, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940488591.644, "dur": 2.625, + "args": { + "External id": 989068,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940488592.837, "dur": 1.283, + "args": { + "External id": 989069,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940488597.242, "dur": 16.416, + "args": { + "External id": 989070,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488620.596, "dur": 10.277, + "args": { + "External id": 989071,"Record function id": 0, "Sequence number": 10552560, "Fwd thread id": 1, "Ev Idx": 7054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940488623.862, "dur": 4.824, + "args": { + "External id": 989072,"Sequence number": 10552560, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7055 + } + }, + { + "ph": "f", "id": 359, "pid": 2338708, "tid": 2379421, "ts": 6345940488623.862, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940488624.714, "dur": 3.773, + "args": { + "External id": 989073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940488625.462, "dur": 2.434, + "args": { + "External id": 989074,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940488626.988, "dur": 0.758, + "args": { + "External id": 989075,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940488635.448, "dur": 5.946, + "args": { + "External id": 989076,"Record function id": 0, "Ev Idx": 7059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940488637.092, "dur": 3.730, + "args": { + "External id": 989077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940488638.119, "dur": 2.153, + "args": { + "External id": 989078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940488639.001, "dur": 1.159, + "args": { + "External id": 989079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940488646.001, "dur": 475.317, + "args": { + "External id": 989080,"Record function id": 0, "Sequence number": 10552559, "Fwd thread id": 1, "Ev Idx": 7063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940488647.521, "dur": 388.354, + "args": { + "External id": 989081,"Sequence number": 10552559, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7064 + } + }, + { + "ph": "f", "id": 360, "pid": 2338708, "tid": 2379421, "ts": 6345940488647.521, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940488684.802, "dur": 4.600, + "args": { + "External id": 989082,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940488687.923, "dur": 1.345, + "args": { + "External id": 989083,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940488705.561, "dur": 4.467, + "args": { + "External id": 989084,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940488720.488, "dur": 2.773, + "args": { + "External id": 989085,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940488895.708, "dur": 2.718, + "args": { + "External id": 989086,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940488902.938, "dur": 41.472, + "args": { + "External id": 989087,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940488918.562, "dur": 0.965, + "args": { + "External id": 989088,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940488951.166, "dur": 35.324, + "args": { + "External id": 989089,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940488953.357, "dur": 32.831, + "args": { + "External id": 989090,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940488960.396, "dur": 4.950, + "args": { + "External id": 989091,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940488967.102, "dur": 18.535, + "args": { + "External id": 989092,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345940488991.708, "dur": 2.752, + "args": { + "External id": 989093,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940488993.114, "dur": 1.154, + "args": { + "External id": 989094,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940489001.673, "dur": 2.832, + "args": { + "External id": 989095,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940489003.108, "dur": 1.274, + "args": { + "External id": 989096,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940489052.925, "dur": 61.332, + "args": { + "External id": 989097,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940489138.352, "dur": 11.830, + "args": { + "External id": 989098,"Record function id": 0, "Ev Idx": 7081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940489140.829, "dur": 8.236, + "args": { + "External id": 989099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940489143.891, "dur": 3.351, + "args": { + "External id": 989100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940489144.758, "dur": 2.375, + "args": { + "External id": 989101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940489155.222, "dur": 7.348, + "args": { + "External id": 989102,"Record function id": 0, "Sequence number": 10552558, "Fwd thread id": 1, "Ev Idx": 7085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940489157.082, "dur": 1.611, + "args": { + "External id": 989103,"Sequence number": 10552558, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7086 + } + }, + { + "ph": "f", "id": 361, "pid": 2338708, "tid": 2379421, "ts": 6345940489157.082, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940489167.202, "dur": 477.427, + "args": { + "External id": 989104,"Record function id": 0, "Sequence number": 10552557, "Fwd thread id": 1, "Ev Idx": 7087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940489168.569, "dur": 464.689, + "args": { + "External id": 989105,"Sequence number": 10552557, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7088 + } + }, + { + "ph": "f", "id": 362, "pid": 2338708, "tid": 2379421, "ts": 6345940489168.569, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940489208.768, "dur": 9.850, + "args": { + "External id": 989106,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940489214.269, "dur": 3.948, + "args": { + "External id": 989107,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]", "[16384, 1]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[67108864, 16384, 1], [], []], "Input Dims": [[8, 4096, 4096], [], []], "Ev Idx": 7090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940489222.663, "dur": 7.026, + "args": { + "External id": 989108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940489224.668, "dur": 4.190, + "args": { + "External id": 989109,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489227.157, "dur": 1.484, + "args": { + "External id": 989110,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2379421, + "ts": 6345940489236.108, "dur": 108.350, + "args": { + "External id": 989111,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16384, 1], [1, 14336], []], "Input Dims": [[32768, 4096], [14336, 4096], []], "Ev Idx": 7094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940489237.036, "dur": 3.502, + "args": { + "External id": 989112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 14336]], "Input Dims": [[14336, 4096]], "Ev Idx": 7095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940489237.762, "dur": 2.146, + "args": { + "External id": 989113,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 14336], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489239.204, "dur": 0.551, + "args": { + "External id": 989114,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[14336, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 14336], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2379421, + "ts": 6345940489241.913, "dur": 101.739, + "args": { + "External id": 989115,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 7098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940489243.337, "dur": 99.356, + "args": { + "External id": 989116,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16384, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 7099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345940489349.356, "dur": 6.499, + "args": { + "External id": 989117,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [58720256, 14336, 1]], "Input Dims": [[32768, 14336], [8, 4096, 14336]], "Ev Idx": 7100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940489353.803, "dur": 1.926, + "args": { + "External id": 989118,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 7101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940489393.177, "dur": 7.111, + "args": { + "External id": 989119,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940489401.908, "dur": 3.391, + "args": { + "External id": 989120,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940489406.340, "dur": 3.149, + "args": { + "External id": 989121,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940489447.697, "dur": 2.741, + "args": { + "External id": 989122,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940489448.677, "dur": 1.550, + "args": { + "External id": 989123,"Record function id": 0, "Concrete Inputs": ["", "[-1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::einsum", "pid": 2338708, "tid": 2379421, + "ts": 6345940489476.417, "dur": 134.071, + "args": { + "External id": 989124,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["", "TensorList", ""], "Input Strides": [[], [[16384, 1], [14336, 1]], []], "Input Dims": [[], [[32768, 4096], [32768, 14336]], []], "Ev Idx": 7107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345940489482.343, "dur": 6.584, + "args": { + "External id": 989125,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489487.015, "dur": 0.919, + "args": { + "External id": 989126,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096, 1]", "[16384, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940489490.733, "dur": 7.496, + "args": { + "External id": 989127,"Record function id": 0, "Concrete Inputs": ["", "[1, 2, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16384, 1, 1], []], "Input Dims": [[32768, 4096, 1], []], "Ev Idx": 7110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489496.555, "dur": 0.793, + "args": { + "External id": 989128,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 32768]", "[1, 1, 16384]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[16384, 1, 1], [], [], []], "Input Dims": [[32768, 4096, 1], [], [], []], "Ev Idx": 7111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2379421, + "ts": 6345940489499.850, "dur": 2.943, + "args": { + "External id": 989129,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 7112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489502.043, "dur": 0.311, + "args": { + "External id": 989130,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940489503.858, "dur": 3.026, + "args": { + "External id": 989131,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489505.637, "dur": 0.625, + "args": { + "External id": 989132,"Record function id": 0, "Concrete Inputs": ["", "[1, 14336, 32768]", "[1, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1, 1], [], [], []], "Input Dims": [[32768, 14336, 1], [], [], []], "Ev Idx": 7115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940489511.155, "dur": 5.807, + "args": { + "External id": 989133,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 16384], []], "Input Dims": [[4096, 1, 32768], []], "Ev Idx": 7116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489513.441, "dur": 3.137, + "args": { + "External id": 989134,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768, 1]", "[1, 16384, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 16384], [], [], []], "Input Dims": [[4096, 1, 32768], [], [], []], "Ev Idx": 7117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940489518.593, "dur": 6.731, + "args": { + "External id": 989135,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 16384, 1], []], "Input Dims": [[4096, 32768, 1], []], "Ev Idx": 7118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940489522.885, "dur": 2.220, + "args": { + "External id": 989136,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096, 32768]", "[4096, 1, 16384]"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList"], "Input Strides": [[1, 16384, 1], [], []], "Input Dims": [[4096, 32768, 1], [], []], "Ev Idx": 7119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940489528.848, "dur": 3.020, + "args": { + "External id": 989137,"Record function id": 0, "Concrete Inputs": ["", "[2, 1, 0]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1, 1, 14336], []], "Input Dims": [[1, 14336, 32768], []], "Ev Idx": 7120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489531.125, "dur": 0.390, + "args": { + "External id": 989138,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336, 1]", "[14336, 1, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1, 14336], [], [], []], "Input Dims": [[1, 14336, 32768], [], [], []], "Ev Idx": 7121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940489532.930, "dur": 2.869, + "args": { + "External id": 989139,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940489533.803, "dur": 1.885, + "args": { + "External id": 989140,"Record function id": 0, "Concrete Inputs": ["", "[1, 32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 1], []], "Input Dims": [[32768, 14336, 1], []], "Ev Idx": 7123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345940489537.130, "dur": 59.800, + "args": { + "External id": 989141,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1, 16384], [469762048, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336]], "Ev Idx": 7124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940489599.152, "dur": 1.486, + "args": { + "External id": 989142,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[1, 4096, 14336], []], "Ev Idx": 7125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::permute", "pid": 2338708, "tid": 2379421, + "ts": 6345940489601.637, "dur": 3.799, + "args": { + "External id": 989143,"Record function id": 0, "Concrete Inputs": ["", "[0, 2, 1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 14336, 1], []], "Input Dims": [[4096, 1, 14336], []], "Ev Idx": 7126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489604.067, "dur": 0.658, + "args": { + "External id": 989144,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336, 1]", "[14336, 1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 14336, 1], [], [], []], "Input Dims": [[4096, 1, 14336], [], [], []], "Ev Idx": 7127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940489608.263, "dur": 0.991, + "args": { + "External id": 989145,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1, 14336], []], "Input Dims": [[4096, 14336, 1], []], "Ev Idx": 7128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940489655.451, "dur": 12.123, + "args": { + "External id": 989146,"Record function id": 0, "Ev Idx": 7129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940489657.628, "dur": 9.143, + "args": { + "External id": 989147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940489659.628, "dur": 5.894, + "args": { + "External id": 989148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940489660.692, "dur": 4.683, + "args": { + "External id": 989149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940489672.411, "dur": 7.200, + "args": { + "External id": 989150,"Record function id": 0, "Sequence number": 10552556, "Fwd thread id": 1, "Ev Idx": 7133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940489673.583, "dur": 4.177, + "args": { + "External id": 989151,"Sequence number": 10552556, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 7134 + } + }, + { + "ph": "f", "id": 363, "pid": 2338708, "tid": 2379421, "ts": 6345940489673.583, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940489675.961, "dur": 1.614, + "args": { + "External id": 989152,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940489676.648, "dur": 0.750, + "args": { + "External id": 989153,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940489684.176, "dur": 120.722, + "args": { + "External id": 989154,"Record function id": 0, "Sequence number": 10552555, "Fwd thread id": 1, "Ev Idx": 7137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940489685.098, "dur": 112.280, + "args": { + "External id": 989155,"Sequence number": 10552555, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7138 + } + }, + { + "ph": "f", "id": 364, "pid": 2338708, "tid": 2379421, "ts": 6345940489685.098, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940489690.663, "dur": 4.749, + "args": { + "External id": 989156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940489692.121, "dur": 2.597, + "args": { + "External id": 989157,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 7140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489693.833, "dur": 0.702, + "args": { + "External id": 989158,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940489696.396, "dur": 46.683, + "args": { + "External id": 989159,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 7142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940489744.571, "dur": 6.494, + "args": { + "External id": 989160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940489745.357, "dur": 4.956, + "args": { + "External id": 989161,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489749.138, "dur": 0.961, + "args": { + "External id": 989162,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940489752.978, "dur": 3.653, + "args": { + "External id": 989163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940489754.102, "dur": 2.009, + "args": { + "External id": 989164,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489755.550, "dur": 0.434, + "args": { + "External id": 989165,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940489757.341, "dur": 39.018, + "args": { + "External id": 989166,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 7149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940489810.575, "dur": 7.716, + "args": { + "External id": 989167,"Record function id": 0, "Sequence number": 10552554, "Fwd thread id": 1, "Ev Idx": 7150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940489811.768, "dur": 4.523, + "args": { + "External id": 989168,"Sequence number": 10552554, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7151 + } + }, + { + "ph": "f", "id": 365, "pid": 2338708, "tid": 2379421, "ts": 6345940489811.768, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940489813.884, "dur": 2.205, + "args": { + "External id": 989169,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940489814.734, "dur": 1.179, + "args": { + "External id": 989170,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940489822.543, "dur": 12.313, + "args": { + "External id": 989171,"Record function id": 0, "Sequence number": 10552553, "Fwd thread id": 1, "Ev Idx": 7154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940489825.830, "dur": 6.942, + "args": { + "External id": 989172,"Sequence number": 10552553, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7155 + } + }, + { + "ph": "f", "id": 366, "pid": 2338708, "tid": 2379421, "ts": 6345940489825.830, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940489826.438, "dur": 6.085, + "args": { + "External id": 989173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940489827.392, "dur": 4.571, + "args": { + "External id": 989174,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489829.145, "dur": 2.688, + "args": { + "External id": 989175,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940489839.929, "dur": 5.260, + "args": { + "External id": 989176,"Record function id": 0, "Ev Idx": 7159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940489841.360, "dur": 3.231, + "args": { + "External id": 989177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940489842.504, "dur": 1.730, + "args": { + "External id": 989178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940489842.968, "dur": 1.154, + "args": { + "External id": 989179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940489848.825, "dur": 8.443, + "args": { + "External id": 989180,"Record function id": 0, "Sequence number": 10552552, "Fwd thread id": 1, "Ev Idx": 7163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940489849.723, "dur": 5.414, + "args": { + "External id": 989181,"Sequence number": 10552552, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[58720256, 14336, 1]], "Input Dims": [[8, 4096, 14336]], "Ev Idx": 7164 + } + }, + { + "ph": "f", "id": 367, "pid": 2338708, "tid": 2379421, "ts": 6345940489849.723, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940489850.844, "dur": 4.114, + "args": { + "External id": 989182,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940489854.098, "dur": 0.745, + "args": { + "External id": 989183,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 7166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940489861.218, "dur": 141.764, + "args": { + "External id": 989184,"Record function id": 0, "Sequence number": 10552551, "Fwd thread id": 1, "Ev Idx": 7167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940489894.378, "dur": 100.320, + "args": { + "External id": 989185,"Sequence number": 10552551, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7168 + } + }, + { + "ph": "f", "id": 368, "pid": 2338708, "tid": 2379421, "ts": 6345940489894.378, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940489896.918, "dur": 3.579, + "args": { + "External id": 989186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[32768, 14336]], "Ev Idx": 7169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940489897.618, "dur": 2.328, + "args": { + "External id": 989187,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[32768, 14336], [], []], "Ev Idx": 7170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489899.115, "dur": 0.714, + "args": { + "External id": 989188,"Record function id": 0, "Concrete Inputs": ["", "[14336, 32768]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[32768, 14336], [], [], []], "Ev Idx": 7171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940489903.942, "dur": 38.844, + "args": { + "External id": 989189,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096]], "Ev Idx": 7172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940489944.254, "dur": 5.484, + "args": { + "External id": 989190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940489944.738, "dur": 4.350, + "args": { + "External id": 989191,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 7174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489946.165, "dur": 2.733, + "args": { + "External id": 989192,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 7175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940489951.068, "dur": 5.800, + "args": { + "External id": 989193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940489952.155, "dur": 4.053, + "args": { + "External id": 989194,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940489955.742, "dur": 0.369, + "args": { + "External id": 989195,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940489957.353, "dur": 36.235, + "args": { + "External id": 989196,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 7179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940490031.767, "dur": 88.118, + "args": { + "External id": 989197,"Record function id": 0, "Sequence number": 10552550, "Fwd thread id": 1, "Ev Idx": 7180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940490033.870, "dur": 8.922, + "args": { + "External id": 989198,"Sequence number": 10552550, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7181 + } + }, + { + "ph": "f", "id": 369, "pid": 2338708, "tid": 2379421, "ts": 6345940490033.870, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940490037.009, "dur": 5.576, + "args": { + "External id": 989199,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940490038.155, "dur": 4.108, + "args": { + "External id": 989200,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345940490046.756, "dur": 68.154, + "args": { + "External id": 989201,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940490127.659, "dur": 13.395, + "args": { + "External id": 989202,"Record function id": 0, "Sequence number": 10552549, "Fwd thread id": 1, "Ev Idx": 7185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940490129.168, "dur": 9.185, + "args": { + "External id": 989203,"Sequence number": 10552549, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7186 + } + }, + { + "ph": "f", "id": 370, "pid": 2338708, "tid": 2379421, "ts": 6345940490129.168, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940490130.441, "dur": 7.663, + "args": { + "External id": 989204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 14336]], "Ev Idx": 7187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940490133.985, "dur": 3.147, + "args": { + "External id": 989205,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 7188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940490136.007, "dur": 0.823, + "args": { + "External id": 989206,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 7189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940490146.318, "dur": 6.971, + "args": { + "External id": 989207,"Record function id": 0, "Ev Idx": 7190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940490147.998, "dur": 4.681, + "args": { + "External id": 989208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940490149.614, "dur": 2.498, + "args": { + "External id": 989209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940490150.219, "dur": 1.717, + "args": { + "External id": 989210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940490158.203, "dur": 470.697, + "args": { + "External id": 989211,"Record function id": 0, "Sequence number": 10552548, "Fwd thread id": 1, "Ev Idx": 7194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940490163.315, "dur": 426.569, + "args": { + "External id": 989212,"Sequence number": 10552548, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 7195 + } + }, + { + "ph": "f", "id": 371, "pid": 2338708, "tid": 2379421, "ts": 6345940490163.315, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2379421, + "ts": 6345940490192.028, "dur": 37.600, + "args": { + "External id": 989213,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345940490193.731, "dur": 35.634, + "args": { + "External id": 989214,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940490196.696, "dur": 7.670, + "args": { + "External id": 989215,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], [], []], "Ev Idx": 7198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940490199.742, "dur": 3.886, + "args": { + "External id": 989216,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940490205.881, "dur": 22.951, + "args": { + "External id": 989217,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940490243.550, "dur": 4.882, + "args": { + "External id": 989218,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940490246.685, "dur": 1.578, + "args": { + "External id": 989219,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940490253.627, "dur": 1.940, + "args": { + "External id": 989220,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940490254.315, "dur": 1.128, + "args": { + "External id": 989221,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940490269.816, "dur": 2.417, + "args": { + "External id": 989222,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940490286.894, "dur": 4.543, + "args": { + "External id": 989223,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940490471.376, "dur": 4.359, + "args": { + "External id": 989224,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940490480.680, "dur": 36.278, + "args": { + "External id": 989225,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940490492.326, "dur": 0.834, + "args": { + "External id": 989226,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940490523.795, "dur": 30.908, + "args": { + "External id": 989227,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940490525.679, "dur": 28.659, + "args": { + "External id": 989228,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940490531.066, "dur": 4.860, + "args": { + "External id": 989229,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940490537.333, "dur": 16.482, + "args": { + "External id": 989230,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345940490561.624, "dur": 2.910, + "args": { + "External id": 989231,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940490563.104, "dur": 1.270, + "args": { + "External id": 989232,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940490572.093, "dur": 2.855, + "args": { + "External id": 989233,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940490573.323, "dur": 1.440, + "args": { + "External id": 989234,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940490577.420, "dur": 2.776, + "args": { + "External id": 989235,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940490578.783, "dur": 1.307, + "args": { + "External id": 989236,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940490608.605, "dur": 18.427, + "args": { + "External id": 989237,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940490641.649, "dur": 8.967, + "args": { + "External id": 989238,"Record function id": 0, "Ev Idx": 7221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940490644.215, "dur": 5.707, + "args": { + "External id": 989239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940490646.267, "dur": 2.643, + "args": { + "External id": 989240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940490647.235, "dur": 1.567, + "args": { + "External id": 989241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940490654.679, "dur": 8.426, + "args": { + "External id": 989242,"Record function id": 0, "Sequence number": 10552547, "Fwd thread id": 1, "Ev Idx": 7225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940490656.208, "dur": 4.493, + "args": { + "External id": 989243,"Sequence number": 10552547, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7226 + } + }, + { + "ph": "f", "id": 372, "pid": 2338708, "tid": 2379421, "ts": 6345940490656.208, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940490657.980, "dur": 2.478, + "args": { + "External id": 989244,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940490659.009, "dur": 1.281, + "args": { + "External id": 989245,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940490667.049, "dur": 160.968, + "args": { + "External id": 989246,"Record function id": 0, "Sequence number": 10552546, "Fwd thread id": 1, "Ev Idx": 7229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940490668.000, "dur": 153.287, + "args": { + "External id": 989247,"Sequence number": 10552546, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7230 + } + }, + { + "ph": "f", "id": 373, "pid": 2338708, "tid": 2379421, "ts": 6345940490668.000, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940490674.133, "dur": 4.835, + "args": { + "External id": 989248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940490675.360, "dur": 2.919, + "args": { + "External id": 989249,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940490677.162, "dur": 0.889, + "args": { + "External id": 989250,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940490679.948, "dur": 77.253, + "args": { + "External id": 989251,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940490758.648, "dur": 8.811, + "args": { + "External id": 989252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940490759.461, "dur": 7.183, + "args": { + "External id": 989253,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940490761.574, "dur": 4.874, + "args": { + "External id": 989254,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940490771.514, "dur": 3.809, + "args": { + "External id": 989255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940490772.840, "dur": 1.957, + "args": { + "External id": 989256,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940490774.149, "dur": 0.556, + "args": { + "External id": 989257,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940490775.953, "dur": 44.095, + "args": { + "External id": 989258,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940490833.584, "dur": 11.276, + "args": { + "External id": 989259,"Record function id": 0, "Sequence number": 10552545, "Fwd thread id": 1, "Ev Idx": 7242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940490834.676, "dur": 8.129, + "args": { + "External id": 989260,"Sequence number": 10552545, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7243 + } + }, + { + "ph": "f", "id": 374, "pid": 2338708, "tid": 2379421, "ts": 6345940490834.676, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940490837.091, "dur": 5.550, + "args": { + "External id": 989261,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940490841.128, "dur": 1.327, + "args": { + "External id": 989262,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940490849.034, "dur": 7.389, + "args": { + "External id": 989263,"Record function id": 0, "Sequence number": 10552544, "Fwd thread id": 1, "Ev Idx": 7246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940490849.899, "dur": 4.835, + "args": { + "External id": 989264,"Sequence number": 10552544, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7247 + } + }, + { + "ph": "f", "id": 375, "pid": 2338708, "tid": 2379421, "ts": 6345940490849.899, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940490850.840, "dur": 3.628, + "args": { + "External id": 989265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940490851.555, "dur": 2.323, + "args": { + "External id": 989266,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940490853.145, "dur": 0.559, + "args": { + "External id": 989267,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940490860.974, "dur": 6.087, + "args": { + "External id": 989268,"Record function id": 0, "Ev Idx": 7251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940490862.525, "dur": 3.934, + "args": { + "External id": 989269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940490863.866, "dur": 2.273, + "args": { + "External id": 989270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940490864.568, "dur": 1.457, + "args": { + "External id": 989271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940490870.677, "dur": 9.308, + "args": { + "External id": 989272,"Record function id": 0, "Sequence number": 10552543, "Fwd thread id": 1, "Ev Idx": 7255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940490874.211, "dur": 3.869, + "args": { + "External id": 989273,"Sequence number": 10552543, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7256 + } + }, + { + "ph": "f", "id": 376, "pid": 2338708, "tid": 2379421, "ts": 6345940490874.211, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940490875.451, "dur": 2.443, + "args": { + "External id": 989274,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940490876.387, "dur": 1.394, + "args": { + "External id": 989275,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940490885.176, "dur": 475.277, + "args": { + "External id": 989276,"Record function id": 0, "Sequence number": 10552542, "Fwd thread id": 1, "Ev Idx": 7259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFuncBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940490886.879, "dur": 451.621, + "args": { + "External id": 989277,"Sequence number": 10552542, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7260 + } + }, + { + "ph": "f", "id": 377, "pid": 2338708, "tid": 2379421, "ts": 6345940490886.879, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940490902.356, "dur": 7.686, + "args": { + "External id": 989278,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 7261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940490905.574, "dur": 3.965, + "args": { + "External id": 989279,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940490912.459, "dur": 6.923, + "args": { + "External id": 989280,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940490914.887, "dur": 4.270, + "args": { + "External id": 989281,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940490921.150, "dur": 4.418, + "args": { + "External id": 989282,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940490923.520, "dur": 1.841, + "args": { + "External id": 989283,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940490953.740, "dur": 354.692, + "args": { + "External id": 989284,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940491110.168, "dur": 6.437, + "args": { + "External id": 989285,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940491119.479, "dur": 3.428, + "args": { + "External id": 989286,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940491126.998, "dur": 2.014, + "args": { + "External id": 989287,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940491130.369, "dur": 2.549, + "args": { + "External id": 989288,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940491186.679, "dur": 4.019, + "args": { + "External id": 989289,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940491188.355, "dur": 2.236, + "args": { + "External id": 989290,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940491192.571, "dur": 34.091, + "args": { + "External id": 989291,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940491199.439, "dur": 2.344, + "args": { + "External id": 989292,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940491228.251, "dur": 1.826, + "args": { + "External id": 989293,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940491229.251, "dur": 0.742, + "args": { + "External id": 989294,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940491231.189, "dur": 20.560, + "args": { + "External id": 989295,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940491233.429, "dur": 3.006, + "args": { + "External id": 989296,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940491323.510, "dur": 4.937, + "args": { + "External id": 989297,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940491331.862, "dur": 0.747, + "args": { + "External id": 989298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2379421, + "ts": 6345940491334.878, "dur": 0.609, + "args": { + "External id": 989299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940491373.007, "dur": 253.658, + "args": { + "External id": 989300,"Record function id": 0, "Sequence number": 10552541, "Fwd thread id": 1, "Ev Idx": 7283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940491375.142, "dur": 243.442, + "args": { + "External id": 989301,"Sequence number": 10552541, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7284 + } + }, + { + "ph": "f", "id": 378, "pid": 2338708, "tid": 2379421, "ts": 6345940491375.142, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345940491399.829, "dur": 49.348, + "args": { + "External id": 989302,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940491404.462, "dur": 4.261, + "args": { + "External id": 989303,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940491410.363, "dur": 38.206, + "args": { + "External id": 989304,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], [8, 4096, 8, 128], []], "Ev Idx": 7287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940491460.473, "dur": 5.906, + "args": { + "External id": 989305,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 7288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940491463.216, "dur": 2.774, + "args": { + "External id": 989306,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940491635.210, "dur": 179.699, + "args": { + "External id": 989307,"Record function id": 0, "Sequence number": 10552540, "Fwd thread id": 1, "Ev Idx": 7290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940491637.245, "dur": 170.901, + "args": { + "External id": 989308,"Sequence number": 10552540, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7291 + } + }, + { + "ph": "f", "id": 379, "pid": 2338708, "tid": 2379421, "ts": 6345940491637.245, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2379421, + "ts": 6345940491650.684, "dur": 42.750, + "args": { + "External id": 989309,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940491654.280, "dur": 3.128, + "args": { + "External id": 989310,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940491658.556, "dur": 34.339, + "args": { + "External id": 989311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], []], "Ev Idx": 7294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2379421, + "ts": 6345940491701.738, "dur": 6.291, + "args": { + "External id": 989312,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 7295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940491704.579, "dur": 3.100, + "args": { + "External id": 989313,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940491823.714, "dur": 15.510, + "args": { + "External id": 989314,"Record function id": 0, "Sequence number": 10552539, "Fwd thread id": 1, "Ev Idx": 7297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940491825.349, "dur": 11.013, + "args": { + "External id": 989315,"Sequence number": 10552539, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7298 + } + }, + { + "ph": "f", "id": 380, "pid": 2338708, "tid": 2379421, "ts": 6345940491825.349, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940491828.181, "dur": 7.848, + "args": { + "External id": 989316,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940491829.699, "dur": 6.117, + "args": { + "External id": 989317,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940491843.274, "dur": 7.393, + "args": { + "External id": 989318,"Record function id": 0, "Sequence number": 10552538, "Fwd thread id": 1, "Ev Idx": 7301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940491844.461, "dur": 4.044, + "args": { + "External id": 989319,"Sequence number": 10552538, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 128]], "Ev Idx": 7302 + } + }, + { + "ph": "f", "id": 381, "pid": 2338708, "tid": 2379421, "ts": 6345940491844.461, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940491846.004, "dur": 2.315, + "args": { + "External id": 989320,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940491847.339, "dur": 0.865, + "args": { + "External id": 989321,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 128, 1], []], "Input Dims": [[8, 4096, 8, 128], []], "Ev Idx": 7304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940491854.556, "dur": 9.753, + "args": { + "External id": 989322,"Record function id": 0, "Sequence number": 10552537, "Fwd thread id": 1, "Ev Idx": 7305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940491855.589, "dur": 6.484, + "args": { + "External id": 989323,"Sequence number": 10552537, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 7306 + } + }, + { + "ph": "f", "id": 382, "pid": 2338708, "tid": 2379421, "ts": 6345940491855.589, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940491859.432, "dur": 2.472, + "args": { + "External id": 989324,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940491860.610, "dur": 1.168, + "args": { + "External id": 989325,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940491868.677, "dur": 7.322, + "args": { + "External id": 989326,"Record function id": 0, "Sequence number": 10552536, "Fwd thread id": 1, "Ev Idx": 7309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940491869.789, "dur": 3.848, + "args": { + "External id": 989327,"Sequence number": 10552536, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7310 + } + }, + { + "ph": "f", "id": 383, "pid": 2338708, "tid": 2379421, "ts": 6345940491869.789, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940491871.384, "dur": 2.080, + "args": { + "External id": 989328,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940491872.237, "dur": 1.114, + "args": { + "External id": 989329,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940491880.157, "dur": 244.313, + "args": { + "External id": 989330,"Record function id": 0, "Sequence number": 10552535, "Fwd thread id": 1, "Ev Idx": 7313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940491881.282, "dur": 231.852, + "args": { + "External id": 989331,"Sequence number": 10552535, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7314 + } + }, + { + "ph": "f", "id": 384, "pid": 2338708, "tid": 2379421, "ts": 6345940491881.282, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940491884.869, "dur": 8.988, + "args": { + "External id": 989332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940491889.252, "dur": 3.837, + "args": { + "External id": 989333,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940491891.434, "dur": 1.370, + "args": { + "External id": 989334,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940491895.756, "dur": 74.758, + "args": { + "External id": 989335,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940491971.835, "dur": 5.372, + "args": { + "External id": 989336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940491972.717, "dur": 3.587, + "args": { + "External id": 989337,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940491974.646, "dur": 1.467, + "args": { + "External id": 989338,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940491981.362, "dur": 4.066, + "args": { + "External id": 989339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940491982.518, "dur": 2.342, + "args": { + "External id": 989340,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940491984.273, "dur": 0.487, + "args": { + "External id": 989341,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940491986.338, "dur": 124.513, + "args": { + "External id": 989342,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492134.987, "dur": 11.335, + "args": { + "External id": 989343,"Record function id": 0, "Sequence number": 10552534, "Fwd thread id": 1, "Ev Idx": 7326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492136.728, "dur": 7.488, + "args": { + "External id": 989344,"Sequence number": 10552534, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7327 + } + }, + { + "ph": "f", "id": 385, "pid": 2338708, "tid": 2379421, "ts": 6345940492136.728, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940492139.541, "dur": 4.484, + "args": { + "External id": 989345,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940492141.388, "dur": 2.497, + "args": { + "External id": 989346,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492150.856, "dur": 10.964, + "args": { + "External id": 989347,"Record function id": 0, "Sequence number": 10552533, "Fwd thread id": 1, "Ev Idx": 7330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492154.081, "dur": 5.715, + "args": { + "External id": 989348,"Sequence number": 10552533, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7331 + } + }, + { + "ph": "f", "id": 386, "pid": 2338708, "tid": 2379421, "ts": 6345940492154.081, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940492155.187, "dur": 4.331, + "args": { + "External id": 989349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940492156.444, "dur": 2.517, + "args": { + "External id": 989350,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940492157.936, "dur": 0.903, + "args": { + "External id": 989351,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940492168.562, "dur": 12.023, + "args": { + "External id": 989352,"Record function id": 0, "Ev Idx": 7335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940492170.738, "dur": 8.834, + "args": { + "External id": 989353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940492174.068, "dur": 5.077, + "args": { + "External id": 989354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940492175.699, "dur": 3.290, + "args": { + "External id": 989355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492184.738, "dur": 9.907, + "args": { + "External id": 989356,"Record function id": 0, "Sequence number": 10552532, "Fwd thread id": 1, "Ev Idx": 7339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492185.805, "dur": 5.915, + "args": { + "External id": 989357,"Sequence number": 10552532, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4194304, 1024, 1]], "Input Dims": [[8, 4096, 1024]], "Ev Idx": 7340 + } + }, + { + "ph": "f", "id": 387, "pid": 2338708, "tid": 2379421, "ts": 6345940492185.805, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940492187.461, "dur": 4.075, + "args": { + "External id": 989358,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940492190.236, "dur": 1.170, + "args": { + "External id": 989359,"Record function id": 0, "Concrete Inputs": ["", "[32768, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 7342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492198.608, "dur": 114.596, + "args": { + "External id": 989360,"Record function id": 0, "Sequence number": 10552531, "Fwd thread id": 1, "Ev Idx": 7343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492199.620, "dur": 105.977, + "args": { + "External id": 989361,"Sequence number": 10552531, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7344 + } + }, + { + "ph": "f", "id": 388, "pid": 2338708, "tid": 2379421, "ts": 6345940492199.620, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940492202.369, "dur": 3.287, + "args": { + "External id": 989362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1024, 1]], "Input Dims": [[32768, 1024]], "Ev Idx": 7345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940492203.166, "dur": 1.941, + "args": { + "External id": 989363,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1024, 1], [], []], "Input Dims": [[32768, 1024], [], []], "Ev Idx": 7346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940492204.458, "dur": 0.517, + "args": { + "External id": 989364,"Record function id": 0, "Concrete Inputs": ["", "[1024, 32768]", "[1, 1024]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1024, 1], [], [], []], "Input Dims": [[32768, 1024], [], [], []], "Ev Idx": 7347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940492209.124, "dur": 37.635, + "args": { + "External id": 989365,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096]], "Ev Idx": 7348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940492248.079, "dur": 5.403, + "args": { + "External id": 989366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940492248.959, "dur": 3.775, + "args": { + "External id": 989367,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 7350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940492250.887, "dur": 1.706, + "args": { + "External id": 989368,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 7351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940492254.863, "dur": 9.116, + "args": { + "External id": 989369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940492256.309, "dur": 7.127, + "args": { + "External id": 989370,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940492260.455, "dur": 2.883, + "args": { + "External id": 989371,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940492264.661, "dur": 40.041, + "args": { + "External id": 989372,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096]], "Ev Idx": 7355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492319.065, "dur": 54.324, + "args": { + "External id": 989373,"Record function id": 0, "Sequence number": 10552530, "Fwd thread id": 1, "Ev Idx": 7356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492332.761, "dur": 8.908, + "args": { + "External id": 989374,"Sequence number": 10552530, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7357 + } + }, + { + "ph": "f", "id": 389, "pid": 2338708, "tid": 2379421, "ts": 6345940492332.761, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940492338.633, "dur": 2.858, + "args": { + "External id": 989375,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940492339.739, "dur": 1.600, + "args": { + "External id": 989376,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2379421, + "ts": 6345940492345.479, "dur": 24.656, + "args": { + "External id": 989377,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492378.387, "dur": 12.405, + "args": { + "External id": 989378,"Record function id": 0, "Sequence number": 10552529, "Fwd thread id": 1, "Ev Idx": 7361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492379.883, "dur": 8.561, + "args": { + "External id": 989379,"Sequence number": 10552529, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7362 + } + }, + { + "ph": "f", "id": 390, "pid": 2338708, "tid": 2379421, "ts": 6345940492379.883, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940492383.758, "dur": 4.443, + "args": { + "External id": 989380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 1024]], "Ev Idx": 7363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940492384.859, "dur": 2.711, + "args": { + "External id": 989381,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 1024], [], []], "Ev Idx": 7364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940492386.817, "dur": 0.622, + "args": { + "External id": 989382,"Record function id": 0, "Concrete Inputs": ["", "[1024, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 1024], [], [], []], "Ev Idx": 7365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940492395.666, "dur": 6.339, + "args": { + "External id": 989383,"Record function id": 0, "Ev Idx": 7366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940492397.219, "dur": 4.160, + "args": { + "External id": 989384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940492398.520, "dur": 2.497, + "args": { + "External id": 989385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940492399.302, "dur": 1.600, + "args": { + "External id": 989386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492406.079, "dur": 10.400, + "args": { + "External id": 989387,"Record function id": 0, "Sequence number": 10552528, "Fwd thread id": 1, "Ev Idx": 7370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "UnsafeViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492407.779, "dur": 6.203, + "args": { + "External id": 989388,"Sequence number": 10552528, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7371 + } + }, + { + "ph": "f", "id": 391, "pid": 2338708, "tid": 2379421, "ts": 6345940492407.779, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940492408.989, "dur": 4.804, + "args": { + "External id": 989389,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940492412.126, "dur": 1.537, + "args": { + "External id": 989390,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492420.366, "dur": 110.609, + "args": { + "External id": 989391,"Record function id": 0, "Sequence number": 10552527, "Fwd thread id": 1, "Ev Idx": 7374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "MmBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492421.426, "dur": 101.746, + "args": { + "External id": 989392,"Sequence number": 10552527, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7375 + } + }, + { + "ph": "f", "id": 392, "pid": 2338708, "tid": 2379421, "ts": 6345940492421.426, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940492423.691, "dur": 2.931, + "args": { + "External id": 989393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940492424.239, "dur": 1.874, + "args": { + "External id": 989394,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32768, 4096], [], []], "Ev Idx": 7377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940492425.495, "dur": 0.489, + "args": { + "External id": 989395,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32768]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 7378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940492427.545, "dur": 42.011, + "args": { + "External id": 989396,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096]], "Ev Idx": 7379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940492472.939, "dur": 4.105, + "args": { + "External id": 989397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940492473.947, "dur": 2.432, + "args": { + "External id": 989398,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940492475.369, "dur": 0.887, + "args": { + "External id": 989399,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940492478.679, "dur": 3.832, + "args": { + "External id": 989400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940492479.839, "dur": 2.209, + "args": { + "External id": 989401,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940492481.464, "dur": 0.505, + "args": { + "External id": 989402,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940492485.858, "dur": 36.432, + "args": { + "External id": 989403,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 7386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492536.111, "dur": 28.773, + "args": { + "External id": 989404,"Record function id": 0, "Sequence number": 10552526, "Fwd thread id": 1, "Ev Idx": 7387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492537.072, "dur": 5.335, + "args": { + "External id": 989405,"Sequence number": 10552526, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 7388 + } + }, + { + "ph": "f", "id": 393, "pid": 2338708, "tid": 2379421, "ts": 6345940492537.072, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940492539.165, "dur": 3.050, + "args": { + "External id": 989406,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940492540.509, "dur": 1.543, + "args": { + "External id": 989407,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940492545.202, "dur": 16.640, + "args": { + "External id": 989408,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492569.256, "dur": 10.536, + "args": { + "External id": 989409,"Record function id": 0, "Sequence number": 10552525, "Fwd thread id": 1, "Ev Idx": 7392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TBackward0", "pid": 2338708, "tid": 2379421, + "ts": 6345940492570.239, "dur": 7.627, + "args": { + "External id": 989410,"Sequence number": 10552525, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7393 + } + }, + { + "ph": "f", "id": 394, "pid": 2338708, "tid": 2379421, "ts": 6345940492570.239, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2379421, + "ts": 6345940492571.300, "dur": 6.309, + "args": { + "External id": 989411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1, 4096]], "Input Dims": [[4096, 4096]], "Ev Idx": 7394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2379421, + "ts": 6345940492572.301, "dur": 4.733, + "args": { + "External id": 989412,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[1, 4096], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 7395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940492576.309, "dur": 0.586, + "args": { + "External id": 989413,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 4096], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 7396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940492584.593, "dur": 5.249, + "args": { + "External id": 989414,"Record function id": 0, "Ev Idx": 7397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940492586.000, "dur": 3.309, + "args": { + "External id": 989415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940492586.861, "dur": 1.818, + "args": { + "External id": 989416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940492587.529, "dur": 1.025, + "args": { + "External id": 989417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940492594.834, "dur": 441.440, + "args": { + "External id": 989418,"Record function id": 0, "Sequence number": 10552524, "Fwd thread id": 1, "Ev Idx": 7401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940492596.249, "dur": 377.784, + "args": { + "External id": 989419,"Sequence number": 10552524, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7402 + } + }, + { + "ph": "f", "id": 395, "pid": 2338708, "tid": 2379421, "ts": 6345940492596.249, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940492636.080, "dur": 2.744, + "args": { + "External id": 989420,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940492637.030, "dur": 1.579, + "args": { + "External id": 989421,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 7404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940492657.992, "dur": 7.191, + "args": { + "External id": 989422,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940492675.646, "dur": 2.593, + "args": { + "External id": 989423,"Record function id": 0, "Concrete Inputs": ["[132, 4096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940492856.180, "dur": 2.124, + "args": { + "External id": 989424,"Record function id": 0, "Concrete Inputs": ["", "[1, -1, 4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[132, 4096], []], "Ev Idx": 7407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940492865.006, "dur": 40.013, + "args": { + "External id": 989425,"Record function id": 0, "Concrete Inputs": ["", "[1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[540672, 4096, 1], [], [], []], "Input Dims": [[1, 132, 4096], [], [], []], "Ev Idx": 7408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940492878.444, "dur": 0.978, + "args": { + "External id": 989426,"Record function id": 0, "Concrete Inputs": ["", "[1, 1, 4096]", "[4096, 0, 1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 7409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940492911.380, "dur": 37.598, + "args": { + "External id": 989427,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], []], "Ev Idx": 7410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940492913.450, "dur": 35.224, + "args": { + "External id": 989428,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[1, 4096], [], [], [], [], [], []], "Ev Idx": 7411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940492918.418, "dur": 5.017, + "args": { + "External id": 989429,"Record function id": 0, "Concrete Inputs": ["[1, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940492927.244, "dur": 20.818, + "args": { + "External id": 989430,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1, 4096], [1, 4096], []], "Ev Idx": 7413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2379421, + "ts": 6345940492953.620, "dur": 3.196, + "args": { + "External id": 989431,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1]], "Input Dims": [[1, 4096], [4096]], "Ev Idx": 7414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940492955.115, "dur": 1.506, + "args": { + "External id": 989432,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[1, 4096], []], "Ev Idx": 7415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940492963.681, "dur": 2.380, + "args": { + "External id": 989433,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940492964.737, "dur": 1.151, + "args": { + "External id": 989434,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 7417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940492990.481, "dur": 16.485, + "args": { + "External id": 989435,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 7418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940493050.724, "dur": 52.593, + "args": { + "External id": 989436,"Record function id": 0, "Ev Idx": 7419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940493092.701, "dur": 9.228, + "args": { + "External id": 989437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940493095.653, "dur": 4.488, + "args": { + "External id": 989438,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940493096.983, "dur": 2.851, + "args": { + "External id": 989439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940493110.061, "dur": 3142.596, + "args": { + "External id": 989440,"Record function id": 0, "Ev Idx": 7423 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 2338708, "tid": 2379421, + "ts": 6345940493148.026, "dur": 1039.680, + "args": { + "External id": 989441,"Record function id": 0, "Ev Idx": 7424 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 2338708, "tid": 2379421, + "ts": 6345940493176.851, "dur": 999.642, + "args": { + "External id": 989442,"Record function id": 0, "Ev Idx": 7425 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338708, "tid": 2379421, + "ts": 6345940493194.246, "dur": 963.565, + "args": { + "External id": 989443,"Record function id": 0, "Ev Idx": 7426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940493284.309, "dur": 7.909, + "args": { + "External id": 989444,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345940493310.296, "dur": 34.179, + "args": { + "External id": 989445,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493315.149, "dur": 1.631, + "args": { + "External id": 989446,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493319.342, "dur": 3.606, + "args": { + "External id": 989447,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493326.030, "dur": 0.389, + "args": { + "External id": 989448,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493327.794, "dur": 0.608, + "args": { + "External id": 989449,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493330.002, "dur": 0.361, + "args": { + "External id": 989450,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493333.495, "dur": 0.278, + "args": { + "External id": 989451,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493334.803, "dur": 0.293, + "args": { + "External id": 989452,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493336.392, "dur": 1.895, + "args": { + "External id": 989453,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493339.670, "dur": 0.275, + "args": { + "External id": 989454,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940493358.615, "dur": 53.075, + "args": { + "External id": 989455,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345940493452.092, "dur": 135.476, + "args": { + "External id": 989456,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940493464.669, "dur": 7.245, + "args": { + "External id": 989457,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345940493478.276, "dur": 12.014, + "args": { + "External id": 989458,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940493483.174, "dur": 6.682, + "args": { + "External id": 989459,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493487.469, "dur": 0.740, + "args": { + "External id": 989460,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345940493498.110, "dur": 30.274, + "args": { + "External id": 989461,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493500.619, "dur": 0.687, + "args": { + "External id": 989462,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493503.084, "dur": 2.551, + "args": { + "External id": 989463,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493506.929, "dur": 0.607, + "args": { + "External id": 989464,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493509.226, "dur": 0.433, + "args": { + "External id": 989465,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493512.746, "dur": 0.363, + "args": { + "External id": 989466,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493514.696, "dur": 0.284, + "args": { + "External id": 989467,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493515.838, "dur": 2.180, + "args": { + "External id": 989468,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493520.938, "dur": 0.334, + "args": { + "External id": 989469,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940493522.204, "dur": 0.446, + "args": { + "External id": 989470,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940493542.588, "dur": 36.143, + "args": { + "External id": 989471,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345940493652.456, "dur": 323.277, + "args": { + "External id": 989472,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345940493688.327, "dur": 282.249, + "args": { + "External id": 989473,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7456, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345940493699.569, "dur": 265.135, + "args": { + "External id": 989474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345940494001.216, "dur": 2.739, + "args": { + "External id": 989475,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7458, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940494196.370, "dur": 2033.041, + "args": { + "External id": 989476,"Sequence number": 10552523, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7459 + } + }, + { + "ph": "f", "id": 396, "pid": 2338708, "tid": 2379421, "ts": 6345940494196.370, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940494335.510, "dur": 118.426, + "args": { + "External id": 989477,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345940494504.016, "dur": 46.164, + "args": { + "External id": 989478,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345940494573.157, "dur": 56.687, + "args": { + "External id": 989479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940494641.291, "dur": 36.963, + "args": { + "External id": 989480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940494685.752, "dur": 49.129, + "args": { + "External id": 989481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940494745.566, "dur": 32.099, + "args": { + "External id": 989482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940494785.774, "dur": 32.456, + "args": { + "External id": 989483,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345940494848.632, "dur": 27.094, + "args": { + "External id": 989484,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345940494898.598, "dur": 33.662, + "args": { + "External id": 989485,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345940494961.144, "dur": 22.756, + "args": { + "External id": 989486,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345940495000.160, "dur": 40.689, + "args": { + "External id": 989487,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940495098.862, "dur": 52.024, + "args": { + "External id": 989488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940495156.369, "dur": 38.312, + "args": { + "External id": 989489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940495231.030, "dur": 290.294, + "args": { + "External id": 989490,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940495329.724, "dur": 8.385, + "args": { + "External id": 989491,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940495340.282, "dur": 6.544, + "args": { + "External id": 989492,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940495348.312, "dur": 2.149, + "args": { + "External id": 989493,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940495351.908, "dur": 2.382, + "args": { + "External id": 989494,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940495401.590, "dur": 5.769, + "args": { + "External id": 989495,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940495403.576, "dur": 3.568, + "args": { + "External id": 989496,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940495409.288, "dur": 35.944, + "args": { + "External id": 989497,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940495415.906, "dur": 1.849, + "args": { + "External id": 989498,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940495449.698, "dur": 1.832, + "args": { + "External id": 989499,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940495450.791, "dur": 0.648, + "args": { + "External id": 989500,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940495452.495, "dur": 20.740, + "args": { + "External id": 989501,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940495455.395, "dur": 2.472, + "args": { + "External id": 989502,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345940495563.819, "dur": 32.923, + "args": { + "External id": 989503,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345940495619.884, "dur": 19.083, + "args": { + "External id": 989504,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940495647.600, "dur": 41.424, + "args": { + "External id": 989505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940495698.683, "dur": 41.012, + "args": { + "External id": 989506,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940495749.731, "dur": 23.810, + "args": { + "External id": 989507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940495779.908, "dur": 34.748, + "args": { + "External id": 989508,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940495822.356, "dur": 32.021, + "args": { + "External id": 989509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940495863.756, "dur": 33.398, + "args": { + "External id": 989510,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345940495921.606, "dur": 29.181, + "args": { + "External id": 989511,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345940495972.235, "dur": 30.806, + "args": { + "External id": 989512,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345940496046.153, "dur": 61.509, + "args": { + "External id": 989513,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345940496133.479, "dur": 18.358, + "args": { + "External id": 989514,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345940496168.941, "dur": 23.829, + "args": { + "External id": 989515,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496278.439, "dur": 19.367, + "args": { + "External id": 989516,"Record function id": 0, "Ev Idx": 7499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496282.070, "dur": 14.785, + "args": { + "External id": 989517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496287.148, "dur": 8.525, + "args": { + "External id": 989518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496288.877, "dur": 6.698, + "args": { + "External id": 989519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496302.344, "dur": 5.680, + "args": { + "External id": 989520,"Record function id": 0, "Ev Idx": 7503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496304.275, "dur": 3.216, + "args": { + "External id": 989521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496305.203, "dur": 1.672, + "args": { + "External id": 989522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496305.828, "dur": 0.933, + "args": { + "External id": 989523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496311.911, "dur": 4.800, + "args": { + "External id": 989524,"Record function id": 0, "Ev Idx": 7507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496313.359, "dur": 2.785, + "args": { + "External id": 989525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496313.991, "dur": 1.631, + "args": { + "External id": 989526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496314.689, "dur": 0.829, + "args": { + "External id": 989527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496320.445, "dur": 4.366, + "args": { + "External id": 989528,"Record function id": 0, "Ev Idx": 7511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496321.724, "dur": 2.590, + "args": { + "External id": 989529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496322.306, "dur": 1.453, + "args": { + "External id": 989530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496322.994, "dur": 0.675, + "args": { + "External id": 989531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496328.439, "dur": 4.218, + "args": { + "External id": 989532,"Record function id": 0, "Ev Idx": 7515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496329.864, "dur": 2.300, + "args": { + "External id": 989533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496330.488, "dur": 1.182, + "args": { + "External id": 989534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496330.826, "dur": 0.756, + "args": { + "External id": 989535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496336.332, "dur": 6.778, + "args": { + "External id": 989536,"Record function id": 0, "Ev Idx": 7519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496337.824, "dur": 4.754, + "args": { + "External id": 989537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496338.440, "dur": 3.604, + "args": { + "External id": 989538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496341.182, "dur": 0.771, + "args": { + "External id": 989539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496346.957, "dur": 4.479, + "args": { + "External id": 989540,"Record function id": 0, "Ev Idx": 7523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496348.404, "dur": 2.564, + "args": { + "External id": 989541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496348.974, "dur": 1.450, + "args": { + "External id": 989542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496349.490, "dur": 0.847, + "args": { + "External id": 989543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496355.106, "dur": 7.072, + "args": { + "External id": 989544,"Record function id": 0, "Ev Idx": 7527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496356.711, "dur": 4.949, + "args": { + "External id": 989545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496357.431, "dur": 3.658, + "args": { + "External id": 989546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496357.768, "dur": 3.238, + "args": { + "External id": 989547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496365.793, "dur": 4.614, + "args": { + "External id": 989548,"Record function id": 0, "Ev Idx": 7531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940496367.390, "dur": 2.528, + "args": { + "External id": 989549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496367.913, "dur": 1.520, + "args": { + "External id": 989550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940496368.306, "dur": 1.052, + "args": { + "External id": 989551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940496374.735, "dur": 274137.197, + "args": { + "External id": 989552,"Record function id": 0, "Sequence number": 10552522, "Fwd thread id": 1, "Ev Idx": 7535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940496376.160, "dur": 274125.150, + "args": { + "External id": 989553,"Sequence number": 10552522, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7536 + } + }, + { + "ph": "f", "id": 397, "pid": 2338708, "tid": 2379421, "ts": 6345940496376.160, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 2338708, "tid": 2379421, + "ts": 6345940496413.547, "dur": 48.601, + "args": { + "External id": 989554,"Record function id": 0, "Ev Idx": 7537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 2338708, "tid": 2379421, + "ts": 6345940496471.864, "dur": 75.927, + "args": { + "External id": 989555,"Record function id": 0, "Ev Idx": 7538 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 2338708, "tid": 2379421, + "ts": 6345940496555.621, "dur": 273936.342, + "args": { + "External id": 989556,"Record function id": 0, "Ev Idx": 7539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940496620.806, "dur": 8.419, + "args": { + "External id": 989557,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940496643.079, "dur": 5.456, + "args": { + "External id": 989558,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345940496668.526, "dur": 272660.964, + "args": { + "External id": 989559,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345940496685.785, "dur": 272628.581, + "args": { + "External id": 989560,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940496850.497, "dur": 5.424, + "args": { + "External id": 989561,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940496883.116, "dur": 272374.251, + "args": { + "External id": 989562,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940496886.483, "dur": 272369.010, + "args": { + "External id": 989563,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940496891.104, "dur": 10.994, + "args": { + "External id": 989564,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940496904.861, "dur": 272344.305, + "args": { + "External id": 989565,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940769460.445, "dur": 17.323, + "args": { + "External id": 989566,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940769466.700, "dur": 10.619, + "args": { + "External id": 989567,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345940769519.374, "dur": 459.218, + "args": { + "External id": 989568,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345940769556.772, "dur": 415.621, + "args": { + "External id": 989569,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7552, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345940769571.718, "dur": 393.997, + "args": { + "External id": 989570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345940770004.248, "dur": 2.943, + "args": { + "External id": 989571,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7554, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940770141.876, "dur": 8.634, + "args": { + "External id": 989572,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940770165.470, "dur": 46.136, + "args": { + "External id": 989573,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940770224.227, "dur": 1.962, + "args": { + "External id": 989574,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940770233.181, "dur": 14.443, + "args": { + "External id": 989575,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940770254.884, "dur": 3.365, + "args": { + "External id": 989576,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940770263.759, "dur": 12.572, + "args": { + "External id": 989577,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940770282.195, "dur": 1.288, + "args": { + "External id": 989578,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940770288.603, "dur": 12.230, + "args": { + "External id": 989579,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940770308.666, "dur": 1.206, + "args": { + "External id": 989580,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940770314.543, "dur": 12.663, + "args": { + "External id": 989581,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940770332.792, "dur": 1.653, + "args": { + "External id": 989582,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940770339.811, "dur": 12.316, + "args": { + "External id": 989583,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940770357.050, "dur": 1.053, + "args": { + "External id": 989584,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940770362.190, "dur": 13.034, + "args": { + "External id": 989585,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940770381.911, "dur": 0.964, + "args": { + "External id": 989586,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940770388.348, "dur": 11.976, + "args": { + "External id": 989587,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940770405.427, "dur": 1.077, + "args": { + "External id": 989588,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940770410.795, "dur": 12.168, + "args": { + "External id": 989589,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940770530.920, "dur": 3347.252, + "args": { + "External id": 989590,"Record function id": 0, "Ev Idx": 7573 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 2338708, "tid": 2379421, + "ts": 6345940770554.927, "dur": 1224.438, + "args": { + "External id": 989591,"Record function id": 0, "Ev Idx": 7574 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338708, "tid": 2379421, + "ts": 6345940770573.495, "dur": 367.862, + "args": { + "External id": 989592,"Record function id": 0, "Ev Idx": 7575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940770668.119, "dur": 4.539, + "args": { + "External id": 989593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940770676.793, "dur": 3.473, + "args": { + "External id": 989594,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940770682.304, "dur": 1.201, + "args": { + "External id": 989595,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940770685.499, "dur": 0.942, + "args": { + "External id": 989596,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940770690.678, "dur": 1.502, + "args": { + "External id": 989597,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940770693.955, "dur": 0.953, + "args": { + "External id": 989598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940770697.131, "dur": 1.326, + "args": { + "External id": 989599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940770706.551, "dur": 1.371, + "args": { + "External id": 989600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940770712.130, "dur": 1.090, + "args": { + "External id": 989601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940770714.673, "dur": 3.431, + "args": { + "External id": 989602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940770738.621, "dur": 167.581, + "args": { + "External id": 989603,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940770757.051, "dur": 144.014, + "args": { + "External id": 989604,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940770777.302, "dur": 14.932, + "args": { + "External id": 989605,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940770796.178, "dur": 74.719, + "args": { + "External id": 989606,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940770799.201, "dur": 71.269, + "args": { + "External id": 989607,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940770803.891, "dur": 7.222, + "args": { + "External id": 989608,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940770814.995, "dur": 54.800, + "args": { + "External id": 989609,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7592 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 2338708, "tid": 2379421, + "ts": 6345940771093.489, "dur": 676.412, + "args": { + "External id": 989610,"Record function id": 0, "Ev Idx": 7593 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338708, "tid": 2379421, + "ts": 6345940771118.797, "dur": 635.235, + "args": { + "External id": 989611,"Record function id": 0, "Ev Idx": 7594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940771195.351, "dur": 7.508, + "args": { + "External id": 989612,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345940771221.618, "dur": 35.475, + "args": { + "External id": 989613,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771227.435, "dur": 2.128, + "args": { + "External id": 989614,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771232.012, "dur": 1.914, + "args": { + "External id": 989615,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771235.416, "dur": 0.576, + "args": { + "External id": 989616,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771237.755, "dur": 0.443, + "args": { + "External id": 989617,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771241.243, "dur": 2.840, + "args": { + "External id": 989618,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771245.423, "dur": 0.348, + "args": { + "External id": 989619,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771247.363, "dur": 0.518, + "args": { + "External id": 989620,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771250.405, "dur": 0.435, + "args": { + "External id": 989621,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771252.491, "dur": 0.323, + "args": { + "External id": 989622,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940771268.887, "dur": 53.963, + "args": { + "External id": 989623,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345940771358.254, "dur": 130.186, + "args": { + "External id": 989624,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940771370.118, "dur": 4.022, + "args": { + "External id": 989625,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345940771380.846, "dur": 11.691, + "args": { + "External id": 989626,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940771385.878, "dur": 6.208, + "args": { + "External id": 989627,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771389.756, "dur": 0.804, + "args": { + "External id": 989628,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345940771399.824, "dur": 29.451, + "args": { + "External id": 989629,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771402.149, "dur": 0.458, + "args": { + "External id": 989630,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771405.414, "dur": 3.072, + "args": { + "External id": 989631,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771409.928, "dur": 0.614, + "args": { + "External id": 989632,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771412.002, "dur": 1.826, + "args": { + "External id": 989633,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771414.853, "dur": 0.418, + "args": { + "External id": 989634,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771416.725, "dur": 0.790, + "args": { + "External id": 989635,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771419.979, "dur": 0.381, + "args": { + "External id": 989636,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771421.608, "dur": 0.470, + "args": { + "External id": 989637,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940771423.253, "dur": 0.611, + "args": { + "External id": 989638,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940771441.444, "dur": 36.567, + "args": { + "External id": 989639,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345940771539.093, "dur": 133.903, + "args": { + "External id": 989640,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345940771571.773, "dur": 97.443, + "args": { + "External id": 989641,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7624, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345940771584.188, "dur": 79.592, + "args": { + "External id": 989642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345940771694.440, "dur": 2.062, + "args": { + "External id": 989643,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7626, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940771787.274, "dur": 2066.833, + "args": { + "External id": 989644,"Sequence number": 10552521, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7627 + } + }, + { + "ph": "f", "id": 398, "pid": 2338708, "tid": 2379421, "ts": 6345940771787.274, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940771908.843, "dur": 141.346, + "args": { + "External id": 989645,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345940772145.816, "dur": 47.602, + "args": { + "External id": 989646,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345940772217.275, "dur": 66.560, + "args": { + "External id": 989647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940772297.371, "dur": 38.679, + "args": { + "External id": 989648,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940772344.056, "dur": 39.279, + "args": { + "External id": 989649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940772391.676, "dur": 32.780, + "args": { + "External id": 989650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940772435.196, "dur": 34.729, + "args": { + "External id": 989651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345940772501.194, "dur": 29.575, + "args": { + "External id": 989652,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345940772552.752, "dur": 36.671, + "args": { + "External id": 989653,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345940772617.193, "dur": 23.073, + "args": { + "External id": 989654,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345940772658.781, "dur": 17.962, + "args": { + "External id": 989655,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940772687.947, "dur": 42.532, + "args": { + "External id": 989656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940772734.870, "dur": 38.285, + "args": { + "External id": 989657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940772806.149, "dur": 392.460, + "args": { + "External id": 989658,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940772899.957, "dur": 7.560, + "args": { + "External id": 989659,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940772910.017, "dur": 3.224, + "args": { + "External id": 989660,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940772914.171, "dur": 3.040, + "args": { + "External id": 989661,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940772918.601, "dur": 5.366, + "args": { + "External id": 989662,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940772992.791, "dur": 9.300, + "args": { + "External id": 989663,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940772997.884, "dur": 3.760, + "args": { + "External id": 989664,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940773004.149, "dur": 98.526, + "args": { + "External id": 989665,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940773032.726, "dur": 2.705, + "args": { + "External id": 989666,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940773106.071, "dur": 2.430, + "args": { + "External id": 989667,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940773107.106, "dur": 1.275, + "args": { + "External id": 989668,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940773109.577, "dur": 22.063, + "args": { + "External id": 989669,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940773112.434, "dur": 0.810, + "args": { + "External id": 989670,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345940773246.020, "dur": 32.694, + "args": { + "External id": 989671,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345940773304.055, "dur": 18.280, + "args": { + "External id": 989672,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940773332.072, "dur": 57.019, + "args": { + "External id": 989673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940773397.759, "dur": 47.123, + "args": { + "External id": 989674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940773456.595, "dur": 25.146, + "args": { + "External id": 989675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940773487.760, "dur": 35.145, + "args": { + "External id": 989676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940773530.373, "dur": 31.633, + "args": { + "External id": 989677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940773569.244, "dur": 34.817, + "args": { + "External id": 989678,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345940773628.813, "dur": 28.330, + "args": { + "External id": 989679,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345940773676.663, "dur": 27.216, + "args": { + "External id": 989680,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345940773725.629, "dur": 18.785, + "args": { + "External id": 989681,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345940773759.927, "dur": 15.434, + "args": { + "External id": 989682,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345940773801.679, "dur": 19.134, + "args": { + "External id": 989683,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940773903.032, "dur": 16.709, + "args": { + "External id": 989684,"Record function id": 0, "Ev Idx": 7667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940773906.446, "dur": 12.294, + "args": { + "External id": 989685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940773911.321, "dur": 6.214, + "args": { + "External id": 989686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940773913.047, "dur": 4.372, + "args": { + "External id": 989687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940773924.425, "dur": 5.230, + "args": { + "External id": 989688,"Record function id": 0, "Ev Idx": 7671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940773926.045, "dur": 3.076, + "args": { + "External id": 989689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940773926.742, "dur": 1.799, + "args": { + "External id": 989690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940773927.666, "dur": 0.788, + "args": { + "External id": 989691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940773933.595, "dur": 4.666, + "args": { + "External id": 989692,"Record function id": 0, "Ev Idx": 7675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940773934.759, "dur": 2.995, + "args": { + "External id": 989693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940773935.563, "dur": 1.597, + "args": { + "External id": 989694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940773936.224, "dur": 0.812, + "args": { + "External id": 989695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940773942.121, "dur": 46.453, + "args": { + "External id": 989696,"Record function id": 0, "Ev Idx": 7679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940773982.618, "dur": 5.343, + "args": { + "External id": 989697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940773983.492, "dur": 3.827, + "args": { + "External id": 989698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940773983.847, "dur": 3.375, + "args": { + "External id": 989699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940773992.416, "dur": 4.880, + "args": { + "External id": 989700,"Record function id": 0, "Ev Idx": 7683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940773994.140, "dur": 2.655, + "args": { + "External id": 989701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940773994.736, "dur": 1.277, + "args": { + "External id": 989702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940773995.085, "dur": 0.813, + "args": { + "External id": 989703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940774000.956, "dur": 23.171, + "args": { + "External id": 989704,"Record function id": 0, "Ev Idx": 7687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940774002.253, "dur": 4.588, + "args": { + "External id": 989705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940774002.822, "dur": 3.361, + "args": { + "External id": 989706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940774005.333, "dur": 0.730, + "args": { + "External id": 989707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940774031.930, "dur": 7.159, + "args": { + "External id": 989708,"Record function id": 0, "Ev Idx": 7691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940774034.048, "dur": 4.243, + "args": { + "External id": 989709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940774035.236, "dur": 2.120, + "args": { + "External id": 989710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940774035.736, "dur": 1.478, + "args": { + "External id": 989711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940774043.419, "dur": 4.293, + "args": { + "External id": 989712,"Record function id": 0, "Ev Idx": 7695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940774044.707, "dur": 2.494, + "args": { + "External id": 989713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940774045.363, "dur": 1.350, + "args": { + "External id": 989714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940774045.907, "dur": 0.716, + "args": { + "External id": 989715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940774087.637, "dur": 9.550, + "args": { + "External id": 989716,"Record function id": 0, "Ev Idx": 7699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940774091.055, "dur": 5.247, + "args": { + "External id": 989717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940774092.323, "dur": 2.838, + "args": { + "External id": 989718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940774093.086, "dur": 1.808, + "args": { + "External id": 989719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940774104.385, "dur": 81336.972, + "args": { + "External id": 989720,"Record function id": 0, "Sequence number": 10552520, "Fwd thread id": 1, "Ev Idx": 7703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940774105.785, "dur": 81323.994, + "args": { + "External id": 989721,"Sequence number": 10552520, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7704 + } + }, + { + "ph": "f", "id": 399, "pid": 2338708, "tid": 2379421, "ts": 6345940774105.785, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 2338708, "tid": 2379421, + "ts": 6345940774142.441, "dur": 46.537, + "args": { + "External id": 989722,"Record function id": 0, "Ev Idx": 7705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 2338708, "tid": 2379421, + "ts": 6345940774198.749, "dur": 78.106, + "args": { + "External id": 989723,"Record function id": 0, "Ev Idx": 7706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 2338708, "tid": 2379421, + "ts": 6345940774284.502, "dur": 81135.875, + "args": { + "External id": 989724,"Record function id": 0, "Ev Idx": 7707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940774393.850, "dur": 9.319, + "args": { + "External id": 989725,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940774414.565, "dur": 5.447, + "args": { + "External id": 989726,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345940774439.068, "dur": 79899.219, + "args": { + "External id": 989727,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345940774455.322, "dur": 79868.716, + "args": { + "External id": 989728,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940774600.487, "dur": 19.673, + "args": { + "External id": 989729,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940774645.448, "dur": 79630.062, + "args": { + "External id": 989730,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940774648.846, "dur": 79625.465, + "args": { + "External id": 989731,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940774653.608, "dur": 13.279, + "args": { + "External id": 989732,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940774669.592, "dur": 79598.783, + "args": { + "External id": 989733,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940854461.797, "dur": 13.453, + "args": { + "External id": 989734,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940854465.711, "dur": 9.095, + "args": { + "External id": 989735,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345940854508.608, "dur": 404.466, + "args": { + "External id": 989736,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345940854545.507, "dur": 361.954, + "args": { + "External id": 989737,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7720, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345940854563.142, "dur": 338.302, + "args": { + "External id": 989738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345940854937.128, "dur": 2.657, + "args": { + "External id": 989739,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7722, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940855005.174, "dur": 26.810, + "args": { + "External id": 989740,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940855048.326, "dur": 76.265, + "args": { + "External id": 989741,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940855141.716, "dur": 3.248, + "args": { + "External id": 989742,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940855152.269, "dur": 14.353, + "args": { + "External id": 989743,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940855173.999, "dur": 1.525, + "args": { + "External id": 989744,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940855183.082, "dur": 12.836, + "args": { + "External id": 989745,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940855201.329, "dur": 1.330, + "args": { + "External id": 989746,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940855207.523, "dur": 11.771, + "args": { + "External id": 989747,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940855224.630, "dur": 1.176, + "args": { + "External id": 989748,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940855230.090, "dur": 12.756, + "args": { + "External id": 989749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940855248.143, "dur": 1.599, + "args": { + "External id": 989750,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940855254.194, "dur": 11.630, + "args": { + "External id": 989751,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940855273.069, "dur": 3.523, + "args": { + "External id": 989752,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940855287.449, "dur": 12.921, + "args": { + "External id": 989753,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940855306.952, "dur": 1.250, + "args": { + "External id": 989754,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940855313.051, "dur": 10.958, + "args": { + "External id": 989755,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940855329.224, "dur": 0.925, + "args": { + "External id": 989756,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940855337.029, "dur": 12.067, + "args": { + "External id": 989757,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940855460.369, "dur": 3340.415, + "args": { + "External id": 989758,"Record function id": 0, "Ev Idx": 7741 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 2338708, "tid": 2379421, + "ts": 6345940855483.114, "dur": 1217.509, + "args": { + "External id": 989759,"Record function id": 0, "Ev Idx": 7742 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338708, "tid": 2379421, + "ts": 6345940855500.613, "dur": 360.785, + "args": { + "External id": 989760,"Record function id": 0, "Ev Idx": 7743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940855599.220, "dur": 5.933, + "args": { + "External id": 989761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940855609.099, "dur": 1.009, + "args": { + "External id": 989762,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940855613.049, "dur": 0.883, + "args": { + "External id": 989763,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940855615.993, "dur": 0.923, + "args": { + "External id": 989764,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940855619.044, "dur": 0.825, + "args": { + "External id": 989765,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940855621.599, "dur": 3.636, + "args": { + "External id": 989766,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940855627.672, "dur": 1.439, + "args": { + "External id": 989767,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940855632.724, "dur": 1.284, + "args": { + "External id": 989768,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940855636.208, "dur": 1.246, + "args": { + "External id": 989769,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940855639.105, "dur": 0.966, + "args": { + "External id": 989770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940855660.763, "dur": 168.132, + "args": { + "External id": 989771,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940855679.939, "dur": 143.052, + "args": { + "External id": 989772,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940855701.347, "dur": 15.662, + "args": { + "External id": 989773,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940855720.927, "dur": 74.956, + "args": { + "External id": 989774,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940855724.289, "dur": 71.205, + "args": { + "External id": 989775,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940855729.507, "dur": 7.107, + "args": { + "External id": 989776,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940855738.940, "dur": 55.653, + "args": { + "External id": 989777,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7760 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 2338708, "tid": 2379421, + "ts": 6345940855962.409, "dur": 729.976, + "args": { + "External id": 989778,"Record function id": 0, "Ev Idx": 7761 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338708, "tid": 2379421, + "ts": 6345940855980.694, "dur": 695.838, + "args": { + "External id": 989779,"Record function id": 0, "Ev Idx": 7762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940856108.994, "dur": 8.234, + "args": { + "External id": 989780,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345940856136.593, "dur": 40.359, + "args": { + "External id": 989781,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856142.555, "dur": 5.891, + "args": { + "External id": 989782,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856150.882, "dur": 0.615, + "args": { + "External id": 989783,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856153.092, "dur": 0.633, + "args": { + "External id": 989784,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856157.002, "dur": 0.441, + "args": { + "External id": 989785,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856159.267, "dur": 0.557, + "args": { + "External id": 989786,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856161.427, "dur": 0.392, + "args": { + "External id": 989787,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856165.449, "dur": 0.485, + "args": { + "External id": 989788,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856167.459, "dur": 0.335, + "args": { + "External id": 989789,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856169.797, "dur": 2.855, + "args": { + "External id": 989790,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940856190.199, "dur": 53.233, + "args": { + "External id": 989791,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345940856280.565, "dur": 126.444, + "args": { + "External id": 989792,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940856292.110, "dur": 4.924, + "args": { + "External id": 989793,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345940856303.035, "dur": 11.521, + "args": { + "External id": 989794,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940856308.027, "dur": 6.072, + "args": { + "External id": 989795,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856311.881, "dur": 0.639, + "args": { + "External id": 989796,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345940856322.305, "dur": 30.148, + "args": { + "External id": 989797,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856324.493, "dur": 0.637, + "args": { + "External id": 989798,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856327.267, "dur": 0.459, + "args": { + "External id": 989799,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856329.223, "dur": 2.137, + "args": { + "External id": 989800,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856333.377, "dur": 0.734, + "args": { + "External id": 989801,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856335.107, "dur": 0.502, + "args": { + "External id": 989802,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856338.605, "dur": 2.568, + "args": { + "External id": 989803,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856342.148, "dur": 0.317, + "args": { + "External id": 989804,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856344.379, "dur": 0.429, + "args": { + "External id": 989805,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940856347.204, "dur": 0.418, + "args": { + "External id": 989806,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940856365.184, "dur": 33.136, + "args": { + "External id": 989807,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345940856461.236, "dur": 132.411, + "args": { + "External id": 989808,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345940856494.123, "dur": 95.603, + "args": { + "External id": 989809,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7792, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345940856506.968, "dur": 78.081, + "args": { + "External id": 989810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345940856614.296, "dur": 2.053, + "args": { + "External id": 989811,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7794, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940856709.340, "dur": 2066.846, + "args": { + "External id": 989812,"Sequence number": 10552519, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7795 + } + }, + { + "ph": "f", "id": 400, "pid": 2338708, "tid": 2379421, "ts": 6345940856709.340, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940856835.334, "dur": 117.234, + "args": { + "External id": 989813,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345940857003.950, "dur": 109.158, + "args": { + "External id": 989814,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345940857140.354, "dur": 66.308, + "args": { + "External id": 989815,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940857219.660, "dur": 36.821, + "args": { + "External id": 989816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940857264.377, "dur": 38.766, + "args": { + "External id": 989817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940857313.704, "dur": 36.649, + "args": { + "External id": 989818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940857359.665, "dur": 34.798, + "args": { + "External id": 989819,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345940857426.516, "dur": 30.142, + "args": { + "External id": 989820,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345940857478.430, "dur": 33.206, + "args": { + "External id": 989821,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345940857537.908, "dur": 21.813, + "args": { + "External id": 989822,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345940857580.270, "dur": 17.993, + "args": { + "External id": 989823,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940857608.117, "dur": 42.681, + "args": { + "External id": 989824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940857655.298, "dur": 37.445, + "args": { + "External id": 989825,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940857726.668, "dur": 374.610, + "args": { + "External id": 989826,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940857816.866, "dur": 10.138, + "args": { + "External id": 989827,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940857829.196, "dur": 3.319, + "args": { + "External id": 989828,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940857833.792, "dur": 3.042, + "args": { + "External id": 989829,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940857837.889, "dur": 4.044, + "args": { + "External id": 989830,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940857908.852, "dur": 6.725, + "args": { + "External id": 989831,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940857911.425, "dur": 3.828, + "args": { + "External id": 989832,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940857920.262, "dur": 38.072, + "args": { + "External id": 989833,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940857927.733, "dur": 2.152, + "args": { + "External id": 989834,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940857960.365, "dur": 1.673, + "args": { + "External id": 989835,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940857961.377, "dur": 0.571, + "args": { + "External id": 989836,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940857963.299, "dur": 17.132, + "args": { + "External id": 989837,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940857965.596, "dur": 0.450, + "args": { + "External id": 989838,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345940858155.542, "dur": 36.008, + "args": { + "External id": 989839,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345940858216.185, "dur": 20.296, + "args": { + "External id": 989840,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940858246.188, "dur": 58.017, + "args": { + "External id": 989841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940858312.577, "dur": 46.858, + "args": { + "External id": 989842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940858371.389, "dur": 24.617, + "args": { + "External id": 989843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940858402.659, "dur": 35.533, + "args": { + "External id": 989844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940858446.408, "dur": 33.083, + "args": { + "External id": 989845,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940858487.759, "dur": 34.258, + "args": { + "External id": 989846,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345940858550.090, "dur": 28.530, + "args": { + "External id": 989847,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345940858598.674, "dur": 29.962, + "args": { + "External id": 989848,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345940858649.221, "dur": 18.803, + "args": { + "External id": 989849,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345940858687.256, "dur": 17.196, + "args": { + "External id": 989850,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345940858721.871, "dur": 19.360, + "args": { + "External id": 989851,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 7834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858825.519, "dur": 17.121, + "args": { + "External id": 989852,"Record function id": 0, "Ev Idx": 7835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858829.549, "dur": 12.068, + "args": { + "External id": 989853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858834.354, "dur": 6.166, + "args": { + "External id": 989854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858836.144, "dur": 4.237, + "args": { + "External id": 989855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858847.277, "dur": 7.882, + "args": { + "External id": 989856,"Record function id": 0, "Ev Idx": 7839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858849.051, "dur": 5.564, + "args": { + "External id": 989857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858850.013, "dur": 4.073, + "args": { + "External id": 989858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858850.483, "dur": 3.502, + "args": { + "External id": 989859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858859.200, "dur": 5.014, + "args": { + "External id": 989860,"Record function id": 0, "Ev Idx": 7843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858860.467, "dur": 3.256, + "args": { + "External id": 989861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858861.179, "dur": 2.062, + "args": { + "External id": 989862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858862.066, "dur": 1.060, + "args": { + "External id": 989863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 7846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858868.143, "dur": 5.342, + "args": { + "External id": 989864,"Record function id": 0, "Ev Idx": 7847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858869.972, "dur": 3.045, + "args": { + "External id": 989865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858870.959, "dur": 1.556, + "args": { + "External id": 989866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858871.630, "dur": 0.796, + "args": { + "External id": 989867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 7850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858877.225, "dur": 4.262, + "args": { + "External id": 989868,"Record function id": 0, "Ev Idx": 7851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858878.629, "dur": 2.397, + "args": { + "External id": 989869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858879.222, "dur": 1.188, + "args": { + "External id": 989870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858879.540, "dur": 0.790, + "args": { + "External id": 989871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858885.146, "dur": 4.521, + "args": { + "External id": 989872,"Record function id": 0, "Ev Idx": 7855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858886.608, "dur": 2.585, + "args": { + "External id": 989873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858887.201, "dur": 1.492, + "args": { + "External id": 989874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858887.894, "dur": 0.712, + "args": { + "External id": 989875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 7858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858893.430, "dur": 4.240, + "args": { + "External id": 989876,"Record function id": 0, "Ev Idx": 7859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858894.785, "dur": 2.411, + "args": { + "External id": 989877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858895.354, "dur": 1.341, + "args": { + "External id": 989878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858895.726, "dur": 0.880, + "args": { + "External id": 989879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858901.310, "dur": 6.811, + "args": { + "External id": 989880,"Record function id": 0, "Ev Idx": 7863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858902.690, "dur": 4.967, + "args": { + "External id": 989881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858903.267, "dur": 3.878, + "args": { + "External id": 989882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858906.309, "dur": 0.703, + "args": { + "External id": 989883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 7866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858911.747, "dur": 5.703, + "args": { + "External id": 989884,"Record function id": 0, "Ev Idx": 7867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940858912.779, "dur": 4.195, + "args": { + "External id": 989885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858913.302, "dur": 3.131, + "args": { + "External id": 989886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940858913.670, "dur": 2.676, + "args": { + "External id": 989887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 7870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940858922.439, "dur": 75757.915, + "args": { + "External id": 989888,"Record function id": 0, "Sequence number": 10552518, "Fwd thread id": 1, "Ev Idx": 7871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940858924.193, "dur": 75745.044, + "args": { + "External id": 989889,"Sequence number": 10552518, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7872 + } + }, + { + "ph": "f", "id": 401, "pid": 2338708, "tid": 2379421, "ts": 6345940858924.193, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 2338708, "tid": 2379421, + "ts": 6345940858956.886, "dur": 45.102, + "args": { + "External id": 989890,"Record function id": 0, "Ev Idx": 7873 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 2338708, "tid": 2379421, + "ts": 6345940859031.095, "dur": 120.507, + "args": { + "External id": 989891,"Record function id": 0, "Ev Idx": 7874 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 2338708, "tid": 2379421, + "ts": 6345940859160.995, "dur": 75499.338, + "args": { + "External id": 989892,"Record function id": 0, "Ev Idx": 7875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940859268.513, "dur": 8.153, + "args": { + "External id": 989893,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940859289.199, "dur": 5.501, + "args": { + "External id": 989894,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345940859313.175, "dur": 74269.807, + "args": { + "External id": 989895,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345940859329.628, "dur": 74238.965, + "args": { + "External id": 989896,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 7879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940859449.975, "dur": 19.195, + "args": { + "External id": 989897,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940859492.044, "dur": 74026.978, + "args": { + "External id": 989898,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 7881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940859498.310, "dur": 74019.396, + "args": { + "External id": 989899,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 7882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940859504.976, "dur": 9.085, + "args": { + "External id": 989900,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940859516.713, "dur": 73994.820, + "args": { + "External id": 989901,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 7884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940933709.239, "dur": 14.152, + "args": { + "External id": 989902,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 7885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940933713.496, "dur": 9.429, + "args": { + "External id": 989903,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345940933759.933, "dur": 453.098, + "args": { + "External id": 989904,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 7887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345940933795.336, "dur": 410.967, + "args": { + "External id": 989905,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7888, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345940933808.386, "dur": 390.983, + "args": { + "External id": 989906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 7889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345940934241.667, "dur": 2.809, + "args": { + "External id": 989907,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7890, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940934316.184, "dur": 8.353, + "args": { + "External id": 989908,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940934338.107, "dur": 37.658, + "args": { + "External id": 989909,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940934388.420, "dur": 1.905, + "args": { + "External id": 989910,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940934396.922, "dur": 13.574, + "args": { + "External id": 989911,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940934420.170, "dur": 3.828, + "args": { + "External id": 989912,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940934435.671, "dur": 13.560, + "args": { + "External id": 989913,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940934457.257, "dur": 1.340, + "args": { + "External id": 989914,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940934463.418, "dur": 12.082, + "args": { + "External id": 989915,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 7898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940934481.014, "dur": 1.045, + "args": { + "External id": 989916,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940934486.634, "dur": 13.002, + "args": { + "External id": 989917,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 7900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940934504.578, "dur": 2.708, + "args": { + "External id": 989918,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940934512.145, "dur": 11.275, + "args": { + "External id": 989919,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 7902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940934531.176, "dur": 1.287, + "args": { + "External id": 989920,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940934536.762, "dur": 11.972, + "args": { + "External id": 989921,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940934553.580, "dur": 1.080, + "args": { + "External id": 989922,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940934559.197, "dur": 11.151, + "args": { + "External id": 989923,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 7906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940934575.139, "dur": 1.105, + "args": { + "External id": 989924,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345940934580.571, "dur": 11.385, + "args": { + "External id": 989925,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 7908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940934697.046, "dur": 3352.886, + "args": { + "External id": 989926,"Record function id": 0, "Ev Idx": 7909 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 2338708, "tid": 2379421, + "ts": 6345940934722.288, "dur": 1223.850, + "args": { + "External id": 989927,"Record function id": 0, "Ev Idx": 7910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338708, "tid": 2379421, + "ts": 6345940934739.731, "dur": 424.381, + "args": { + "External id": 989928,"Record function id": 0, "Ev Idx": 7911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940934835.376, "dur": 4.936, + "args": { + "External id": 989929,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 7912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940934844.006, "dur": 3.297, + "args": { + "External id": 989930,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940934849.479, "dur": 0.921, + "args": { + "External id": 989931,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940934853.060, "dur": 1.055, + "args": { + "External id": 989932,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940934856.095, "dur": 1.242, + "args": { + "External id": 989933,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940934859.359, "dur": 0.921, + "args": { + "External id": 989934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 7917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940934862.182, "dur": 1.012, + "args": { + "External id": 989935,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 7918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940934867.578, "dur": 1.505, + "args": { + "External id": 989936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940934870.845, "dur": 0.848, + "args": { + "External id": 989937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940934873.576, "dur": 3.371, + "args": { + "External id": 989938,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 7921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940934896.440, "dur": 226.480, + "args": { + "External id": 989939,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940934914.450, "dur": 201.272, + "args": { + "External id": 989940,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 7923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940934932.963, "dur": 14.957, + "args": { + "External id": 989941,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940934954.240, "dur": 93.000, + "args": { + "External id": 989942,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 7925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940934957.378, "dur": 89.439, + "args": { + "External id": 989943,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 7926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940934961.640, "dur": 6.784, + "args": { + "External id": 989944,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940934970.211, "dur": 75.460, + "args": { + "External id": 989945,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 7928 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 2338708, "tid": 2379421, + "ts": 6345940935271.307, "dur": 665.335, + "args": { + "External id": 989946,"Record function id": 0, "Ev Idx": 7929 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338708, "tid": 2379421, + "ts": 6345940935291.897, "dur": 630.742, + "args": { + "External id": 989947,"Record function id": 0, "Ev Idx": 7930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940935364.406, "dur": 7.393, + "args": { + "External id": 989948,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345940935390.996, "dur": 35.269, + "args": { + "External id": 989949,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935396.032, "dur": 3.407, + "args": { + "External id": 989950,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935401.495, "dur": 0.669, + "args": { + "External id": 989951,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935403.608, "dur": 0.893, + "args": { + "External id": 989952,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935408.037, "dur": 0.544, + "args": { + "External id": 989953,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935409.521, "dur": 3.249, + "args": { + "External id": 989954,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935414.109, "dur": 0.419, + "args": { + "External id": 989955,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935416.821, "dur": 0.723, + "args": { + "External id": 989956,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935418.931, "dur": 0.410, + "args": { + "External id": 989957,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935420.308, "dur": 1.992, + "args": { + "External id": 989958,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940935441.581, "dur": 52.188, + "args": { + "External id": 989959,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345940935529.340, "dur": 126.485, + "args": { + "External id": 989960,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 7943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940935540.949, "dur": 5.178, + "args": { + "External id": 989961,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345940935552.147, "dur": 11.987, + "args": { + "External id": 989962,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345940935557.242, "dur": 6.450, + "args": { + "External id": 989963,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 7946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935561.102, "dur": 0.908, + "args": { + "External id": 989964,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 7947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345940935572.208, "dur": 27.790, + "args": { + "External id": 989965,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 7948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935574.098, "dur": 0.331, + "args": { + "External id": 989966,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935576.201, "dur": 4.175, + "args": { + "External id": 989967,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935581.546, "dur": 0.633, + "args": { + "External id": 989968,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935583.758, "dur": 0.578, + "args": { + "External id": 989969,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935587.496, "dur": 0.360, + "args": { + "External id": 989970,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935589.509, "dur": 0.290, + "args": { + "External id": 989971,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935590.909, "dur": 0.527, + "args": { + "External id": 989972,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935594.213, "dur": 0.687, + "args": { + "External id": 989973,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940935596.047, "dur": 0.384, + "args": { + "External id": 989974,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 7957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940935610.716, "dur": 33.852, + "args": { + "External id": 989975,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 7958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345940935704.942, "dur": 139.182, + "args": { + "External id": 989976,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 7959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345940935740.551, "dur": 99.610, + "args": { + "External id": 989977,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7960, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345940935751.473, "dur": 84.003, + "args": { + "External id": 989978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 7961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345940935863.672, "dur": 1.969, + "args": { + "External id": 989979,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7962, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940935962.882, "dur": 2042.774, + "args": { + "External id": 989980,"Sequence number": 10552517, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 7963 + } + }, + { + "ph": "f", "id": 402, "pid": 2338708, "tid": 2379421, "ts": 6345940935962.882, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940936155.351, "dur": 124.644, + "args": { + "External id": 989981,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 7964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345940936333.594, "dur": 46.436, + "args": { + "External id": 989982,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 7965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345940936401.851, "dur": 56.724, + "args": { + "External id": 989983,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 7966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940936471.554, "dur": 36.740, + "args": { + "External id": 989984,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940936515.442, "dur": 36.797, + "args": { + "External id": 989985,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940936560.174, "dur": 32.864, + "args": { + "External id": 989986,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 7969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940936603.321, "dur": 34.004, + "args": { + "External id": 989987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 7970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345940936669.626, "dur": 27.237, + "args": { + "External id": 989988,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 7971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345940936723.081, "dur": 33.442, + "args": { + "External id": 989989,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345940936780.734, "dur": 20.683, + "args": { + "External id": 989990,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 7973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345940936821.994, "dur": 18.387, + "args": { + "External id": 989991,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 7974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940936851.066, "dur": 41.221, + "args": { + "External id": 989992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940936896.138, "dur": 36.859, + "args": { + "External id": 989993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345940936965.867, "dur": 382.490, + "args": { + "External id": 989994,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 7977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940937117.756, "dur": 9.164, + "args": { + "External id": 989995,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940937129.796, "dur": 3.681, + "args": { + "External id": 989996,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940937134.595, "dur": 2.640, + "args": { + "External id": 989997,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940937138.433, "dur": 5.316, + "args": { + "External id": 989998,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940937212.357, "dur": 6.435, + "args": { + "External id": 989999,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940937214.549, "dur": 3.688, + "args": { + "External id": 990000,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940937220.960, "dur": 38.855, + "args": { + "External id": 990001,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940937228.280, "dur": 2.037, + "args": { + "External id": 990002,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345940937261.804, "dur": 2.003, + "args": { + "External id": 990003,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940937262.973, "dur": 0.692, + "args": { + "External id": 990004,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 7987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345940937267.610, "dur": 17.244, + "args": { + "External id": 990005,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 7988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940937269.583, "dur": 0.584, + "args": { + "External id": 990006,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 7989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345940937399.972, "dur": 32.929, + "args": { + "External id": 990007,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345940937454.344, "dur": 21.016, + "args": { + "External id": 990008,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940937484.616, "dur": 57.505, + "args": { + "External id": 990009,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940937549.801, "dur": 45.860, + "args": { + "External id": 990010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940937606.036, "dur": 23.561, + "args": { + "External id": 990011,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 7994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940937638.715, "dur": 36.432, + "args": { + "External id": 990012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 7995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940937683.942, "dur": 32.590, + "args": { + "External id": 990013,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 7996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345940937724.051, "dur": 34.498, + "args": { + "External id": 990014,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 7997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345940937783.792, "dur": 25.330, + "args": { + "External id": 990015,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 7998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345940937830.457, "dur": 29.717, + "args": { + "External id": 990016,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345940937881.026, "dur": 19.740, + "args": { + "External id": 990017,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345940937920.066, "dur": 15.943, + "args": { + "External id": 990018,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345940937952.272, "dur": 17.465, + "args": { + "External id": 990019,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938112.984, "dur": 19.366, + "args": { + "External id": 990020,"Record function id": 0, "Ev Idx": 8003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938117.343, "dur": 13.496, + "args": { + "External id": 990021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938122.317, "dur": 6.860, + "args": { + "External id": 990022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938124.187, "dur": 4.782, + "args": { + "External id": 990023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938137.233, "dur": 5.896, + "args": { + "External id": 990024,"Record function id": 0, "Ev Idx": 8007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938139.136, "dur": 3.473, + "args": { + "External id": 990025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938140.237, "dur": 1.689, + "args": { + "External id": 990026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938141.000, "dur": 0.844, + "args": { + "External id": 990027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938147.361, "dur": 5.023, + "args": { + "External id": 990028,"Record function id": 0, "Ev Idx": 8011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938148.956, "dur": 2.933, + "args": { + "External id": 990029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938149.561, "dur": 1.808, + "args": { + "External id": 990030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938150.338, "dur": 0.939, + "args": { + "External id": 990031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938156.241, "dur": 4.814, + "args": { + "External id": 990032,"Record function id": 0, "Ev Idx": 8015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938157.598, "dur": 2.945, + "args": { + "External id": 990033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938158.432, "dur": 1.369, + "args": { + "External id": 990034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938158.979, "dur": 0.743, + "args": { + "External id": 990035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938164.692, "dur": 4.791, + "args": { + "External id": 990036,"Record function id": 0, "Ev Idx": 8019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938166.176, "dur": 2.828, + "args": { + "External id": 990037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938166.817, "dur": 1.366, + "args": { + "External id": 990038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938167.373, "dur": 0.723, + "args": { + "External id": 990039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938173.216, "dur": 8.777, + "args": { + "External id": 990040,"Record function id": 0, "Ev Idx": 8023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938174.472, "dur": 7.012, + "args": { + "External id": 990041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938175.015, "dur": 5.839, + "args": { + "External id": 990042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938177.289, "dur": 3.483, + "args": { + "External id": 990043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938185.976, "dur": 4.622, + "args": { + "External id": 990044,"Record function id": 0, "Ev Idx": 8027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938187.547, "dur": 2.548, + "args": { + "External id": 990045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938188.076, "dur": 1.451, + "args": { + "External id": 990046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938188.577, "dur": 0.865, + "args": { + "External id": 990047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938194.305, "dur": 4.706, + "args": { + "External id": 990048,"Record function id": 0, "Ev Idx": 8031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938195.458, "dur": 3.065, + "args": { + "External id": 990049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938196.019, "dur": 1.979, + "args": { + "External id": 990050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938197.135, "dur": 0.775, + "args": { + "External id": 990051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938202.827, "dur": 4.818, + "args": { + "External id": 990052,"Record function id": 0, "Ev Idx": 8035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345940938204.206, "dur": 2.941, + "args": { + "External id": 990053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938205.082, "dur": 1.273, + "args": { + "External id": 990054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345940938205.586, "dur": 0.694, + "args": { + "External id": 990055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940938213.052, "dur": 74897.372, + "args": { + "External id": 990056,"Record function id": 0, "Sequence number": 10552516, "Fwd thread id": 1, "Ev Idx": 8039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345940938215.242, "dur": 74881.202, + "args": { + "External id": 990057,"Sequence number": 10552516, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8040 + } + }, + { + "ph": "f", "id": 403, "pid": 2338708, "tid": 2379421, "ts": 6345940938215.242, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 2338708, "tid": 2379421, + "ts": 6345940938251.162, "dur": 43.065, + "args": { + "External id": 990058,"Record function id": 0, "Ev Idx": 8041 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 2338708, "tid": 2379421, + "ts": 6345940938303.609, "dur": 73.148, + "args": { + "External id": 990059,"Record function id": 0, "Ev Idx": 8042 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 2338708, "tid": 2379421, + "ts": 6345940938384.097, "dur": 74663.547, + "args": { + "External id": 990060,"Record function id": 0, "Ev Idx": 8043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940938486.813, "dur": 8.390, + "args": { + "External id": 990061,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345940938505.824, "dur": 5.240, + "args": { + "External id": 990062,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345940938531.453, "dur": 73271.509, + "args": { + "External id": 990063,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345940938546.747, "dur": 73239.814, + "args": { + "External id": 990064,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345940938662.114, "dur": 19.266, + "args": { + "External id": 990065,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345940938704.195, "dur": 73025.600, + "args": { + "External id": 990066,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345940938707.485, "dur": 73020.730, + "args": { + "External id": 990067,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345940938714.386, "dur": 9.054, + "args": { + "External id": 990068,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345940938728.882, "dur": 72991.373, + "args": { + "External id": 990069,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941011942.358, "dur": 17.233, + "args": { + "External id": 990070,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941011946.973, "dur": 12.159, + "args": { + "External id": 990071,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941012000.214, "dur": 519.085, + "args": { + "External id": 990072,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941012088.231, "dur": 424.273, + "args": { + "External id": 990073,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8056, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941012105.343, "dur": 397.961, + "args": { + "External id": 990074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941012553.712, "dur": 2.883, + "args": { + "External id": 990075,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8058, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941012645.798, "dur": 9.603, + "args": { + "External id": 990076,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941012673.103, "dur": 39.510, + "args": { + "External id": 990077,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941012727.800, "dur": 5.591, + "args": { + "External id": 990078,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941012740.915, "dur": 16.068, + "args": { + "External id": 990079,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941012764.585, "dur": 1.048, + "args": { + "External id": 990080,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941012772.179, "dur": 14.809, + "args": { + "External id": 990081,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941012793.903, "dur": 0.869, + "args": { + "External id": 990082,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941012800.458, "dur": 13.700, + "args": { + "External id": 990083,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941012821.016, "dur": 1.019, + "args": { + "External id": 990084,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941012827.484, "dur": 14.501, + "args": { + "External id": 990085,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941012848.141, "dur": 1.243, + "args": { + "External id": 990086,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941012857.184, "dur": 14.145, + "args": { + "External id": 990087,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941012877.850, "dur": 0.835, + "args": { + "External id": 990088,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941012883.998, "dur": 13.678, + "args": { + "External id": 990089,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941012903.183, "dur": 0.932, + "args": { + "External id": 990090,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941012909.943, "dur": 13.158, + "args": { + "External id": 990091,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941012930.309, "dur": 0.850, + "args": { + "External id": 990092,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941012936.943, "dur": 14.624, + "args": { + "External id": 990093,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941013134.263, "dur": 3418.003, + "args": { + "External id": 990094,"Record function id": 0, "Ev Idx": 8077 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 2338708, "tid": 2379421, + "ts": 6345941013163.746, "dur": 1275.851, + "args": { + "External id": 990095,"Record function id": 0, "Ev Idx": 8078 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338708, "tid": 2379421, + "ts": 6345941013184.856, "dur": 401.260, + "args": { + "External id": 990096,"Record function id": 0, "Ev Idx": 8079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941013292.959, "dur": 8.954, + "args": { + "External id": 990097,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941013307.111, "dur": 1.149, + "args": { + "External id": 990098,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941013310.574, "dur": 0.878, + "args": { + "External id": 990099,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941013313.815, "dur": 0.907, + "args": { + "External id": 990100,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941013316.515, "dur": 0.857, + "args": { + "External id": 990101,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941013319.176, "dur": 1.044, + "args": { + "External id": 990102,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941013324.680, "dur": 1.059, + "args": { + "External id": 990103,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941013327.343, "dur": 1.173, + "args": { + "External id": 990104,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941013330.388, "dur": 3.464, + "args": { + "External id": 990105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941013335.676, "dur": 1.074, + "args": { + "External id": 990106,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941013359.033, "dur": 187.354, + "args": { + "External id": 990107,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941013380.640, "dur": 159.688, + "args": { + "External id": 990108,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941013400.950, "dur": 17.252, + "args": { + "External id": 990109,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941013423.663, "dur": 84.941, + "args": { + "External id": 990110,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941013426.854, "dur": 81.283, + "args": { + "External id": 990111,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013433.103, "dur": 8.248, + "args": { + "External id": 990112,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941013444.233, "dur": 63.110, + "args": { + "External id": 990113,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 2338708, "tid": 2379421, + "ts": 6345941013691.268, "dur": 738.946, + "args": { + "External id": 990114,"Record function id": 0, "Ev Idx": 8097 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338708, "tid": 2379421, + "ts": 6345941013711.977, "dur": 701.659, + "args": { + "External id": 990115,"Record function id": 0, "Ev Idx": 8098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941013780.513, "dur": 7.080, + "args": { + "External id": 990116,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941013808.584, "dur": 38.278, + "args": { + "External id": 990117,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013815.381, "dur": 2.222, + "args": { + "External id": 990118,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013820.672, "dur": 2.766, + "args": { + "External id": 990119,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013825.078, "dur": 0.677, + "args": { + "External id": 990120,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013827.501, "dur": 2.931, + "args": { + "External id": 990121,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013834.104, "dur": 0.290, + "args": { + "External id": 990122,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013835.896, "dur": 0.370, + "args": { + "External id": 990123,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013837.795, "dur": 0.402, + "args": { + "External id": 990124,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013841.292, "dur": 0.630, + "args": { + "External id": 990125,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013843.289, "dur": 0.343, + "args": { + "External id": 990126,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941013859.663, "dur": 47.039, + "args": { + "External id": 990127,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941013942.088, "dur": 194.801, + "args": { + "External id": 990128,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941013953.108, "dur": 3.896, + "args": { + "External id": 990129,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941013962.692, "dur": 11.096, + "args": { + "External id": 990130,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941013967.534, "dur": 5.749, + "args": { + "External id": 990131,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013971.067, "dur": 0.731, + "args": { + "External id": 990132,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941013981.672, "dur": 52.211, + "args": { + "External id": 990133,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013983.764, "dur": 3.002, + "args": { + "External id": 990134,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013989.604, "dur": 0.592, + "args": { + "External id": 990135,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013991.381, "dur": 0.486, + "args": { + "External id": 990136,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013993.338, "dur": 2.059, + "args": { + "External id": 990137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013996.779, "dur": 0.604, + "args": { + "External id": 990138,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941013998.524, "dur": 0.374, + "args": { + "External id": 990139,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941014001.766, "dur": 0.382, + "args": { + "External id": 990140,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941014003.225, "dur": 0.370, + "args": { + "External id": 990141,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941014004.682, "dur": 23.069, + "args": { + "External id": 990142,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941014050.392, "dur": 75.287, + "args": { + "External id": 990143,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941014192.103, "dur": 136.977, + "args": { + "External id": 990144,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941014227.211, "dur": 98.137, + "args": { + "External id": 990145,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8128, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941014237.476, "dur": 82.667, + "args": { + "External id": 990146,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941014348.547, "dur": 2.230, + "args": { + "External id": 990147,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8130, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941014447.815, "dur": 2078.860, + "args": { + "External id": 990148,"Sequence number": 10552515, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8131 + } + }, + { + "ph": "f", "id": 404, "pid": 2338708, "tid": 2379421, "ts": 6345941014447.815, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941014579.808, "dur": 122.376, + "args": { + "External id": 990149,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941014757.276, "dur": 44.941, + "args": { + "External id": 990150,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941014826.560, "dur": 58.384, + "args": { + "External id": 990151,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941014897.005, "dur": 36.219, + "args": { + "External id": 990152,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941014940.465, "dur": 37.279, + "args": { + "External id": 990153,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941014985.252, "dur": 55.156, + "args": { + "External id": 990154,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941015096.330, "dur": 42.498, + "args": { + "External id": 990155,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941015175.364, "dur": 29.335, + "args": { + "External id": 990156,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941015229.390, "dur": 35.339, + "args": { + "External id": 990157,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941015291.757, "dur": 23.196, + "args": { + "External id": 990158,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941015331.953, "dur": 18.859, + "args": { + "External id": 990159,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941015361.808, "dur": 59.031, + "args": { + "External id": 990160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941015431.513, "dur": 47.156, + "args": { + "External id": 990161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941015516.923, "dur": 295.779, + "args": { + "External id": 990162,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941015621.109, "dur": 7.709, + "args": { + "External id": 990163,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941015631.138, "dur": 3.677, + "args": { + "External id": 990164,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941015636.545, "dur": 2.888, + "args": { + "External id": 990165,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941015640.418, "dur": 7.460, + "args": { + "External id": 990166,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941015700.417, "dur": 5.464, + "args": { + "External id": 990167,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941015702.386, "dur": 3.158, + "args": { + "External id": 990168,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941015707.795, "dur": 36.182, + "args": { + "External id": 990169,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941015714.200, "dur": 2.079, + "args": { + "External id": 990170,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941015746.015, "dur": 1.695, + "args": { + "External id": 990171,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941015746.867, "dur": 0.766, + "args": { + "External id": 990172,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941015748.822, "dur": 16.253, + "args": { + "External id": 990173,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941015750.822, "dur": 0.461, + "args": { + "External id": 990174,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941015854.102, "dur": 29.464, + "args": { + "External id": 990175,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941015906.474, "dur": 17.905, + "args": { + "External id": 990176,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941015932.911, "dur": 42.280, + "args": { + "External id": 990177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941015982.275, "dur": 61.769, + "args": { + "External id": 990178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941016097.200, "dur": 31.423, + "args": { + "External id": 990179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941016138.825, "dur": 37.478, + "args": { + "External id": 990180,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941016185.084, "dur": 31.909, + "args": { + "External id": 990181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941016223.851, "dur": 33.082, + "args": { + "External id": 990182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941016285.935, "dur": 34.429, + "args": { + "External id": 990183,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941016340.478, "dur": 32.240, + "args": { + "External id": 990184,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941016393.403, "dur": 21.239, + "args": { + "External id": 990185,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941016434.549, "dur": 16.747, + "args": { + "External id": 990186,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941016469.461, "dur": 19.615, + "args": { + "External id": 990187,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016577.874, "dur": 17.078, + "args": { + "External id": 990188,"Record function id": 0, "Ev Idx": 8171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016581.870, "dur": 12.007, + "args": { + "External id": 990189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016586.687, "dur": 5.903, + "args": { + "External id": 990190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016588.244, "dur": 4.199, + "args": { + "External id": 990191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016599.501, "dur": 5.465, + "args": { + "External id": 990192,"Record function id": 0, "Ev Idx": 8175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016601.022, "dur": 3.406, + "args": { + "External id": 990193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016602.086, "dur": 1.607, + "args": { + "External id": 990194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016602.822, "dur": 0.739, + "args": { + "External id": 990195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016609.029, "dur": 5.031, + "args": { + "External id": 990196,"Record function id": 0, "Ev Idx": 8179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016610.653, "dur": 2.913, + "args": { + "External id": 990197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016611.481, "dur": 1.620, + "args": { + "External id": 990198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016612.111, "dur": 0.864, + "args": { + "External id": 990199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016617.835, "dur": 4.966, + "args": { + "External id": 990200,"Record function id": 0, "Ev Idx": 8183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016619.372, "dur": 2.977, + "args": { + "External id": 990201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016620.065, "dur": 1.558, + "args": { + "External id": 990202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016620.865, "dur": 0.678, + "args": { + "External id": 990203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016626.409, "dur": 4.641, + "args": { + "External id": 990204,"Record function id": 0, "Ev Idx": 8187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016627.709, "dur": 2.892, + "args": { + "External id": 990205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016628.420, "dur": 1.607, + "args": { + "External id": 990206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016628.997, "dur": 0.952, + "args": { + "External id": 990207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016634.666, "dur": 41.427, + "args": { + "External id": 990208,"Record function id": 0, "Ev Idx": 8191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016670.439, "dur": 5.137, + "args": { + "External id": 990209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016671.288, "dur": 3.653, + "args": { + "External id": 990210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016674.033, "dur": 0.745, + "args": { + "External id": 990211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016679.924, "dur": 7.237, + "args": { + "External id": 990212,"Record function id": 0, "Ev Idx": 8195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016682.023, "dur": 4.614, + "args": { + "External id": 990213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016682.712, "dur": 3.458, + "args": { + "External id": 990214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016683.127, "dur": 2.973, + "args": { + "External id": 990215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016690.894, "dur": 4.211, + "args": { + "External id": 990216,"Record function id": 0, "Ev Idx": 8199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016692.281, "dur": 2.357, + "args": { + "External id": 990217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016692.849, "dur": 1.286, + "args": { + "External id": 990218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016693.163, "dur": 0.877, + "args": { + "External id": 990219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016698.717, "dur": 4.420, + "args": { + "External id": 990220,"Record function id": 0, "Ev Idx": 8203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941016700.186, "dur": 2.478, + "args": { + "External id": 990221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016700.758, "dur": 1.191, + "args": { + "External id": 990222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941016701.229, "dur": 0.641, + "args": { + "External id": 990223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941016707.896, "dur": 69022.145, + "args": { + "External id": 990224,"Record function id": 0, "Sequence number": 10552514, "Fwd thread id": 1, "Ev Idx": 8207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941016709.429, "dur": 69010.250, + "args": { + "External id": 990225,"Sequence number": 10552514, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8208 + } + }, + { + "ph": "f", "id": 405, "pid": 2338708, "tid": 2379421, "ts": 6345941016709.429, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 2338708, "tid": 2379421, + "ts": 6345941016744.210, "dur": 47.114, + "args": { + "External id": 990226,"Record function id": 0, "Ev Idx": 8209 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 2338708, "tid": 2379421, + "ts": 6345941016800.258, "dur": 78.342, + "args": { + "External id": 990227,"Record function id": 0, "Ev Idx": 8210 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 2338708, "tid": 2379421, + "ts": 6345941016885.144, "dur": 68825.455, + "args": { + "External id": 990228,"Record function id": 0, "Ev Idx": 8211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941016987.878, "dur": 7.772, + "args": { + "External id": 990229,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941017027.709, "dur": 6.679, + "args": { + "External id": 990230,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941017095.469, "dur": 67504.428, + "args": { + "External id": 990231,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941017116.409, "dur": 67468.602, + "args": { + "External id": 990232,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941017256.445, "dur": 24.458, + "args": { + "External id": 990233,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941017305.369, "dur": 67230.609, + "args": { + "External id": 990234,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941017309.908, "dur": 67224.769, + "args": { + "External id": 990235,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941017315.219, "dur": 12.400, + "args": { + "External id": 990236,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941017330.002, "dur": 67199.331, + "args": { + "External id": 990237,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941084727.343, "dur": 13.371, + "args": { + "External id": 990238,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941084731.543, "dur": 8.697, + "args": { + "External id": 990239,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941084773.886, "dur": 475.792, + "args": { + "External id": 990240,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941084811.310, "dur": 431.736, + "args": { + "External id": 990241,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8224, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941084826.773, "dur": 409.094, + "args": { + "External id": 990242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941085279.034, "dur": 2.536, + "args": { + "External id": 990243,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8226, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941085356.141, "dur": 7.981, + "args": { + "External id": 990244,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941085378.288, "dur": 40.472, + "args": { + "External id": 990245,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941085430.897, "dur": 4.442, + "args": { + "External id": 990246,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941085442.150, "dur": 13.473, + "args": { + "External id": 990247,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941085462.417, "dur": 0.897, + "args": { + "External id": 990248,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941085470.921, "dur": 13.329, + "args": { + "External id": 990249,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941085489.470, "dur": 0.897, + "args": { + "External id": 990250,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941085495.704, "dur": 11.602, + "args": { + "External id": 990251,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941085513.267, "dur": 0.875, + "args": { + "External id": 990252,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941085519.428, "dur": 14.439, + "args": { + "External id": 990253,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941085538.821, "dur": 1.686, + "args": { + "External id": 990254,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941085545.649, "dur": 13.479, + "args": { + "External id": 990255,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941085567.656, "dur": 1.031, + "args": { + "External id": 990256,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941085573.153, "dur": 14.146, + "args": { + "External id": 990257,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941085592.603, "dur": 0.935, + "args": { + "External id": 990258,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941085598.668, "dur": 13.801, + "args": { + "External id": 990259,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941085617.414, "dur": 0.944, + "args": { + "External id": 990260,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941085625.244, "dur": 13.970, + "args": { + "External id": 990261,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941085747.487, "dur": 3521.572, + "args": { + "External id": 990262,"Record function id": 0, "Ev Idx": 8245 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 2338708, "tid": 2379421, + "ts": 6345941085772.388, "dur": 1405.844, + "args": { + "External id": 990263,"Record function id": 0, "Ev Idx": 8246 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338708, "tid": 2379421, + "ts": 6345941085790.486, "dur": 501.563, + "args": { + "External id": 990264,"Record function id": 0, "Ev Idx": 8247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941085895.884, "dur": 8.503, + "args": { + "External id": 990265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941085909.309, "dur": 0.992, + "args": { + "External id": 990266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941085912.898, "dur": 0.928, + "args": { + "External id": 990267,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941085916.174, "dur": 0.841, + "args": { + "External id": 990268,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941085918.801, "dur": 1.131, + "args": { + "External id": 990269,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941085922.267, "dur": 1.185, + "args": { + "External id": 990270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941085925.252, "dur": 1.358, + "args": { + "External id": 990271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941085930.466, "dur": 1.363, + "args": { + "External id": 990272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941085933.824, "dur": 3.327, + "args": { + "External id": 990273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941085938.910, "dur": 1.114, + "args": { + "External id": 990274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941085960.744, "dur": 283.629, + "args": { + "External id": 990275,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941085981.247, "dur": 254.213, + "args": { + "External id": 990276,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941086004.377, "dur": 43.658, + "args": { + "External id": 990277,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941086108.120, "dur": 91.663, + "args": { + "External id": 990278,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941086111.832, "dur": 87.509, + "args": { + "External id": 990279,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086117.372, "dur": 9.857, + "args": { + "External id": 990280,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941086130.916, "dur": 67.549, + "args": { + "External id": 990281,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8264 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 2338708, "tid": 2379421, + "ts": 6345941086407.258, "dur": 761.347, + "args": { + "External id": 990282,"Record function id": 0, "Ev Idx": 8265 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338708, "tid": 2379421, + "ts": 6345941086430.364, "dur": 721.221, + "args": { + "External id": 990283,"Record function id": 0, "Ev Idx": 8266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941086505.566, "dur": 7.370, + "args": { + "External id": 990284,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941086532.532, "dur": 34.532, + "args": { + "External id": 990285,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086538.565, "dur": 2.176, + "args": { + "External id": 990286,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086543.406, "dur": 2.533, + "args": { + "External id": 990287,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086547.166, "dur": 0.625, + "args": { + "External id": 990288,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086549.014, "dur": 2.932, + "args": { + "External id": 990289,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086554.640, "dur": 0.644, + "args": { + "External id": 990290,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086556.447, "dur": 0.255, + "args": { + "External id": 990291,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086558.091, "dur": 0.640, + "args": { + "External id": 990292,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086561.000, "dur": 0.297, + "args": { + "External id": 990293,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086562.864, "dur": 0.398, + "args": { + "External id": 990294,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941086579.843, "dur": 50.658, + "args": { + "External id": 990295,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941086670.618, "dur": 130.177, + "args": { + "External id": 990296,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941086682.072, "dur": 4.102, + "args": { + "External id": 990297,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941086692.469, "dur": 11.190, + "args": { + "External id": 990298,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941086697.154, "dur": 6.005, + "args": { + "External id": 990299,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086700.993, "dur": 0.718, + "args": { + "External id": 990300,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941086716.080, "dur": 27.171, + "args": { + "External id": 990301,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086717.999, "dur": 2.943, + "args": { + "External id": 990302,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086723.487, "dur": 0.557, + "args": { + "External id": 990303,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086725.096, "dur": 0.345, + "args": { + "External id": 990304,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086726.610, "dur": 1.454, + "args": { + "External id": 990305,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086729.506, "dur": 0.340, + "args": { + "External id": 990306,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086731.062, "dur": 0.332, + "args": { + "External id": 990307,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086734.213, "dur": 0.276, + "args": { + "External id": 990308,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086735.536, "dur": 0.403, + "args": { + "External id": 990309,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941086737.284, "dur": 2.433, + "args": { + "External id": 990310,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941086755.417, "dur": 37.029, + "args": { + "External id": 990311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941086850.583, "dur": 143.218, + "args": { + "External id": 990312,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941086887.272, "dur": 102.719, + "args": { + "External id": 990313,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8296, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941086897.652, "dur": 87.312, + "args": { + "External id": 990314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941087035.856, "dur": 4.423, + "args": { + "External id": 990315,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8298, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941087186.680, "dur": 2057.018, + "args": { + "External id": 990316,"Sequence number": 10552513, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8299 + } + }, + { + "ph": "f", "id": 406, "pid": 2338708, "tid": 2379421, "ts": 6345941087186.680, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941087316.250, "dur": 129.464, + "args": { + "External id": 990317,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941087494.300, "dur": 45.624, + "args": { + "External id": 990318,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941087563.154, "dur": 56.690, + "args": { + "External id": 990319,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941087632.163, "dur": 34.858, + "args": { + "External id": 990320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941087674.202, "dur": 36.594, + "args": { + "External id": 990321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941087718.297, "dur": 31.931, + "args": { + "External id": 990322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941087761.767, "dur": 33.274, + "args": { + "External id": 990323,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941087823.626, "dur": 27.318, + "args": { + "External id": 990324,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941087871.962, "dur": 34.521, + "args": { + "External id": 990325,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941087933.904, "dur": 23.452, + "args": { + "External id": 990326,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941087973.533, "dur": 18.295, + "args": { + "External id": 990327,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941088002.708, "dur": 107.956, + "args": { + "External id": 990328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941088117.974, "dur": 42.303, + "args": { + "External id": 990329,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941088215.596, "dur": 306.039, + "args": { + "External id": 990330,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941088315.336, "dur": 8.403, + "args": { + "External id": 990331,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941088326.098, "dur": 3.990, + "args": { + "External id": 990332,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941088331.513, "dur": 2.594, + "args": { + "External id": 990333,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941088335.407, "dur": 4.467, + "args": { + "External id": 990334,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941088398.883, "dur": 8.863, + "args": { + "External id": 990335,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941088403.987, "dur": 3.497, + "args": { + "External id": 990336,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941088409.829, "dur": 38.747, + "args": { + "External id": 990337,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941088418.225, "dur": 2.205, + "args": { + "External id": 990338,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941088450.677, "dur": 1.755, + "args": { + "External id": 990339,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941088451.682, "dur": 0.675, + "args": { + "External id": 990340,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941088453.592, "dur": 15.579, + "args": { + "External id": 990341,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941088455.874, "dur": 0.731, + "args": { + "External id": 990342,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941088570.216, "dur": 29.468, + "args": { + "External id": 990343,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941088627.610, "dur": 18.830, + "args": { + "External id": 990344,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941088655.682, "dur": 46.880, + "args": { + "External id": 990345,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941088710.108, "dur": 43.594, + "args": { + "External id": 990346,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941088762.087, "dur": 25.233, + "args": { + "External id": 990347,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941088795.967, "dur": 35.126, + "args": { + "External id": 990348,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941088839.486, "dur": 31.325, + "args": { + "External id": 990349,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941088879.886, "dur": 33.942, + "args": { + "External id": 990350,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941088936.930, "dur": 27.577, + "args": { + "External id": 990351,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941088985.747, "dur": 51.571, + "args": { + "External id": 990352,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941089096.868, "dur": 25.917, + "args": { + "External id": 990353,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941089146.872, "dur": 18.063, + "args": { + "External id": 990354,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941089184.832, "dur": 20.822, + "args": { + "External id": 990355,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089294.172, "dur": 18.055, + "args": { + "External id": 990356,"Record function id": 0, "Ev Idx": 8339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089298.009, "dur": 13.149, + "args": { + "External id": 990357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089303.237, "dur": 6.595, + "args": { + "External id": 990358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089305.072, "dur": 4.646, + "args": { + "External id": 990359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089316.782, "dur": 5.343, + "args": { + "External id": 990360,"Record function id": 0, "Ev Idx": 8343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089318.591, "dur": 3.030, + "args": { + "External id": 990361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089319.700, "dur": 1.399, + "args": { + "External id": 990362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089320.236, "dur": 0.757, + "args": { + "External id": 990363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089326.138, "dur": 5.436, + "args": { + "External id": 990364,"Record function id": 0, "Ev Idx": 8347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089327.687, "dur": 3.378, + "args": { + "External id": 990365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089328.777, "dur": 1.777, + "args": { + "External id": 990366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089329.519, "dur": 0.901, + "args": { + "External id": 990367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089335.665, "dur": 5.661, + "args": { + "External id": 990368,"Record function id": 0, "Ev Idx": 8351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089337.839, "dur": 3.000, + "args": { + "External id": 990369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089339.112, "dur": 1.099, + "args": { + "External id": 990370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089339.424, "dur": 0.687, + "args": { + "External id": 990371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089345.062, "dur": 4.664, + "args": { + "External id": 990372,"Record function id": 0, "Ev Idx": 8355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089346.685, "dur": 2.520, + "args": { + "External id": 990373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089347.308, "dur": 1.213, + "args": { + "External id": 990374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089347.592, "dur": 0.806, + "args": { + "External id": 990375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089353.440, "dur": 7.147, + "args": { + "External id": 990376,"Record function id": 0, "Ev Idx": 8359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089354.721, "dur": 5.341, + "args": { + "External id": 990377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089355.459, "dur": 3.939, + "args": { + "External id": 990378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089355.873, "dur": 3.436, + "args": { + "External id": 990379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089364.389, "dur": 4.779, + "args": { + "External id": 990380,"Record function id": 0, "Ev Idx": 8363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089365.930, "dur": 2.738, + "args": { + "External id": 990381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089366.470, "dur": 1.329, + "args": { + "External id": 990382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089366.774, "dur": 0.948, + "args": { + "External id": 990383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089372.890, "dur": 6.640, + "args": { + "External id": 990384,"Record function id": 0, "Ev Idx": 8367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089374.381, "dur": 4.641, + "args": { + "External id": 990385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089374.920, "dur": 3.611, + "args": { + "External id": 990386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089377.729, "dur": 0.720, + "args": { + "External id": 990387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089383.270, "dur": 4.296, + "args": { + "External id": 990388,"Record function id": 0, "Ev Idx": 8371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941089384.371, "dur": 2.708, + "args": { + "External id": 990389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089385.020, "dur": 1.428, + "args": { + "External id": 990390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941089385.418, "dur": 0.921, + "args": { + "External id": 990391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941089392.762, "dur": 65307.290, + "args": { + "External id": 990392,"Record function id": 0, "Sequence number": 10552512, "Fwd thread id": 1, "Ev Idx": 8375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941089394.685, "dur": 65294.513, + "args": { + "External id": 990393,"Sequence number": 10552512, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8376 + } + }, + { + "ph": "f", "id": 407, "pid": 2338708, "tid": 2379421, "ts": 6345941089394.685, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 2338708, "tid": 2379421, + "ts": 6345941089429.053, "dur": 43.968, + "args": { + "External id": 990394,"Record function id": 0, "Ev Idx": 8377 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 2338708, "tid": 2379421, + "ts": 6345941089481.941, "dur": 77.387, + "args": { + "External id": 990395,"Record function id": 0, "Ev Idx": 8378 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 2338708, "tid": 2379421, + "ts": 6345941089566.849, "dur": 65113.007, + "args": { + "External id": 990396,"Record function id": 0, "Ev Idx": 8379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941089666.822, "dur": 8.092, + "args": { + "External id": 990397,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941089686.373, "dur": 5.505, + "args": { + "External id": 990398,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941089710.670, "dur": 63856.152, + "args": { + "External id": 990399,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941089729.050, "dur": 63822.998, + "args": { + "External id": 990400,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941089869.640, "dur": 23.012, + "args": { + "External id": 990401,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941089918.065, "dur": 63588.197, + "args": { + "External id": 990402,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941089924.828, "dur": 63580.676, + "args": { + "External id": 990403,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941089929.877, "dur": 12.047, + "args": { + "External id": 990404,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941089944.338, "dur": 63559.337, + "args": { + "External id": 990405,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941153693.697, "dur": 13.436, + "args": { + "External id": 990406,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941153697.465, "dur": 9.126, + "args": { + "External id": 990407,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941153742.674, "dur": 470.532, + "args": { + "External id": 990408,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941153779.773, "dur": 426.990, + "args": { + "External id": 990409,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8392, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941153793.695, "dur": 405.331, + "args": { + "External id": 990410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941154241.163, "dur": 2.665, + "args": { + "External id": 990411,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8394, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941154320.418, "dur": 7.979, + "args": { + "External id": 990412,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941154342.451, "dur": 40.967, + "args": { + "External id": 990413,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941154395.720, "dur": 4.985, + "args": { + "External id": 990414,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941154407.132, "dur": 16.515, + "args": { + "External id": 990415,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941154430.558, "dur": 1.321, + "args": { + "External id": 990416,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941154437.192, "dur": 15.396, + "args": { + "External id": 990417,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941154460.252, "dur": 1.120, + "args": { + "External id": 990418,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941154466.539, "dur": 14.002, + "args": { + "External id": 990419,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941154486.614, "dur": 1.221, + "args": { + "External id": 990420,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941154492.797, "dur": 14.344, + "args": { + "External id": 990421,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941154512.492, "dur": 1.855, + "args": { + "External id": 990422,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941154519.247, "dur": 13.612, + "args": { + "External id": 990423,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941154537.787, "dur": 1.078, + "args": { + "External id": 990424,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941154545.244, "dur": 14.096, + "args": { + "External id": 990425,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941154564.668, "dur": 1.233, + "args": { + "External id": 990426,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941154570.451, "dur": 13.061, + "args": { + "External id": 990427,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941154588.712, "dur": 1.262, + "args": { + "External id": 990428,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941154594.395, "dur": 15.168, + "args": { + "External id": 990429,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941154716.551, "dur": 3472.263, + "args": { + "External id": 990430,"Record function id": 0, "Ev Idx": 8413 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 2338708, "tid": 2379421, + "ts": 6345941154740.818, "dur": 1241.778, + "args": { + "External id": 990431,"Record function id": 0, "Ev Idx": 8414 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338708, "tid": 2379421, + "ts": 6345941154757.259, "dur": 438.389, + "args": { + "External id": 990432,"Record function id": 0, "Ev Idx": 8415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941154858.273, "dur": 6.914, + "args": { + "External id": 990433,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941154869.066, "dur": 0.987, + "args": { + "External id": 990434,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941154872.580, "dur": 0.883, + "args": { + "External id": 990435,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941154875.746, "dur": 1.055, + "args": { + "External id": 990436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941154880.841, "dur": 0.905, + "args": { + "External id": 990437,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941154884.135, "dur": 1.033, + "args": { + "External id": 990438,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941154886.904, "dur": 1.059, + "args": { + "External id": 990439,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941154889.713, "dur": 1.195, + "args": { + "External id": 990440,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941154894.746, "dur": 3.363, + "args": { + "External id": 990441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941154900.066, "dur": 0.874, + "args": { + "External id": 990442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941154920.169, "dur": 235.523, + "args": { + "External id": 990443,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941154939.584, "dur": 210.240, + "args": { + "External id": 990444,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941154959.445, "dur": 17.891, + "args": { + "External id": 990445,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941154982.435, "dur": 136.405, + "args": { + "External id": 990446,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941154988.021, "dur": 130.280, + "args": { + "External id": 990447,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941154992.072, "dur": 5.476, + "args": { + "External id": 990448,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941154999.339, "dur": 117.461, + "args": { + "External id": 990449,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8432 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 2338708, "tid": 2379421, + "ts": 6345941155301.360, "dur": 671.548, + "args": { + "External id": 990450,"Record function id": 0, "Ev Idx": 8433 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338708, "tid": 2379421, + "ts": 6345941155319.840, "dur": 638.652, + "args": { + "External id": 990451,"Record function id": 0, "Ev Idx": 8434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941155394.238, "dur": 7.985, + "args": { + "External id": 990452,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941155420.728, "dur": 34.395, + "args": { + "External id": 990453,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155426.481, "dur": 2.133, + "args": { + "External id": 990454,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155431.093, "dur": 2.048, + "args": { + "External id": 990455,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155435.064, "dur": 0.668, + "args": { + "External id": 990456,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155437.019, "dur": 3.054, + "args": { + "External id": 990457,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155442.756, "dur": 0.645, + "args": { + "External id": 990458,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155444.336, "dur": 0.519, + "args": { + "External id": 990459,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155446.360, "dur": 0.429, + "args": { + "External id": 990460,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155449.328, "dur": 0.299, + "args": { + "External id": 990461,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155450.818, "dur": 0.743, + "args": { + "External id": 990462,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941155467.978, "dur": 49.217, + "args": { + "External id": 990463,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941155554.424, "dur": 128.456, + "args": { + "External id": 990464,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941155566.908, "dur": 4.290, + "args": { + "External id": 990465,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941155577.774, "dur": 11.772, + "args": { + "External id": 990466,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941155582.682, "dur": 6.223, + "args": { + "External id": 990467,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155586.522, "dur": 0.667, + "args": { + "External id": 990468,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941155598.327, "dur": 29.904, + "args": { + "External id": 990469,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155600.449, "dur": 3.319, + "args": { + "External id": 990470,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155606.108, "dur": 0.377, + "args": { + "External id": 990471,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155608.127, "dur": 0.560, + "args": { + "External id": 990472,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155609.871, "dur": 1.731, + "args": { + "External id": 990473,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155612.953, "dur": 0.619, + "args": { + "External id": 990474,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155614.924, "dur": 0.541, + "args": { + "External id": 990475,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155618.681, "dur": 0.394, + "args": { + "External id": 990476,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155620.358, "dur": 0.590, + "args": { + "External id": 990477,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941155622.137, "dur": 2.468, + "args": { + "External id": 990478,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941155640.235, "dur": 34.014, + "args": { + "External id": 990479,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941155733.180, "dur": 140.603, + "args": { + "External id": 990480,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941155765.937, "dur": 103.618, + "args": { + "External id": 990481,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8464, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941155776.302, "dur": 88.476, + "args": { + "External id": 990482,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941155896.838, "dur": 2.265, + "args": { + "External id": 990483,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8466, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941155991.145, "dur": 2173.095, + "args": { + "External id": 990484,"Sequence number": 10552511, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8467 + } + }, + { + "ph": "f", "id": 408, "pid": 2338708, "tid": 2379421, "ts": 6345941155991.145, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941156192.640, "dur": 127.291, + "args": { + "External id": 990485,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941156373.040, "dur": 49.324, + "args": { + "External id": 990486,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941156446.541, "dur": 56.993, + "args": { + "External id": 990487,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941156515.964, "dur": 35.628, + "args": { + "External id": 990488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941156559.192, "dur": 36.791, + "args": { + "External id": 990489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941156604.024, "dur": 31.318, + "args": { + "External id": 990490,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941156647.306, "dur": 33.376, + "args": { + "External id": 990491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941156711.131, "dur": 27.288, + "args": { + "External id": 990492,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941156759.499, "dur": 31.785, + "args": { + "External id": 990493,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941156818.268, "dur": 21.530, + "args": { + "External id": 990494,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941156858.313, "dur": 16.506, + "args": { + "External id": 990495,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941156886.218, "dur": 42.030, + "args": { + "External id": 990496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941156932.497, "dur": 37.148, + "args": { + "External id": 990497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941157003.197, "dur": 404.322, + "args": { + "External id": 990498,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941157155.814, "dur": 9.116, + "args": { + "External id": 990499,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941157167.394, "dur": 4.220, + "args": { + "External id": 990500,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941157172.763, "dur": 3.521, + "args": { + "External id": 990501,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941157187.680, "dur": 9.575, + "args": { + "External id": 990502,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941157259.447, "dur": 6.462, + "args": { + "External id": 990503,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941157262.090, "dur": 3.546, + "args": { + "External id": 990504,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941157268.574, "dur": 39.561, + "args": { + "External id": 990505,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941157276.485, "dur": 1.976, + "args": { + "External id": 990506,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941157312.160, "dur": 2.011, + "args": { + "External id": 990507,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941157313.465, "dur": 0.600, + "args": { + "External id": 990508,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941157315.256, "dur": 33.185, + "args": { + "External id": 990509,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941157318.040, "dur": 0.689, + "args": { + "External id": 990510,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941157463.948, "dur": 33.981, + "args": { + "External id": 990511,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941157523.438, "dur": 21.637, + "args": { + "External id": 990512,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941157554.317, "dur": 60.030, + "args": { + "External id": 990513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941157621.935, "dur": 51.324, + "args": { + "External id": 990514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941157682.125, "dur": 26.812, + "args": { + "External id": 990515,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941157717.584, "dur": 40.049, + "args": { + "External id": 990516,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941157766.357, "dur": 31.418, + "args": { + "External id": 990517,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941157806.379, "dur": 35.095, + "args": { + "External id": 990518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941157865.492, "dur": 28.523, + "args": { + "External id": 990519,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941157915.410, "dur": 29.823, + "args": { + "External id": 990520,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941157965.689, "dur": 20.005, + "args": { + "External id": 990521,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941158002.490, "dur": 38.091, + "args": { + "External id": 990522,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941158100.535, "dur": 24.997, + "args": { + "External id": 990523,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158215.435, "dur": 16.988, + "args": { + "External id": 990524,"Record function id": 0, "Ev Idx": 8507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158219.115, "dur": 12.268, + "args": { + "External id": 990525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158224.433, "dur": 5.907, + "args": { + "External id": 990526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158225.860, "dur": 4.314, + "args": { + "External id": 990527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158237.089, "dur": 5.436, + "args": { + "External id": 990528,"Record function id": 0, "Ev Idx": 8511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158238.554, "dur": 3.445, + "args": { + "External id": 990529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158239.462, "dur": 1.953, + "args": { + "External id": 990530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158240.294, "dur": 1.023, + "args": { + "External id": 990531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158246.620, "dur": 5.353, + "args": { + "External id": 990532,"Record function id": 0, "Ev Idx": 8515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158248.165, "dur": 3.269, + "args": { + "External id": 990533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158249.075, "dur": 1.886, + "args": { + "External id": 990534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158249.997, "dur": 0.848, + "args": { + "External id": 990535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158255.902, "dur": 5.682, + "args": { + "External id": 990536,"Record function id": 0, "Ev Idx": 8519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158257.459, "dur": 3.622, + "args": { + "External id": 990537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158258.517, "dur": 1.733, + "args": { + "External id": 990538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158259.417, "dur": 0.730, + "args": { + "External id": 990539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158265.416, "dur": 4.327, + "args": { + "External id": 990540,"Record function id": 0, "Ev Idx": 8523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158266.702, "dur": 2.568, + "args": { + "External id": 990541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158267.430, "dur": 1.140, + "args": { + "External id": 990542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158267.740, "dur": 0.743, + "args": { + "External id": 990543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158273.606, "dur": 7.139, + "args": { + "External id": 990544,"Record function id": 0, "Ev Idx": 8527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158275.069, "dur": 5.188, + "args": { + "External id": 990545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158275.587, "dur": 4.103, + "args": { + "External id": 990546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158276.087, "dur": 3.507, + "args": { + "External id": 990547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158284.705, "dur": 4.830, + "args": { + "External id": 990548,"Record function id": 0, "Ev Idx": 8531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158286.466, "dur": 2.581, + "args": { + "External id": 990549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158287.165, "dur": 1.193, + "args": { + "External id": 990550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158287.526, "dur": 0.759, + "args": { + "External id": 990551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158293.191, "dur": 7.352, + "args": { + "External id": 990552,"Record function id": 0, "Ev Idx": 8535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158297.748, "dur": 2.265, + "args": { + "External id": 990553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158298.374, "dur": 1.088, + "args": { + "External id": 990554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158298.681, "dur": 0.704, + "args": { + "External id": 990555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158304.855, "dur": 7.186, + "args": { + "External id": 990556,"Record function id": 0, "Ev Idx": 8539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941158306.277, "dur": 5.256, + "args": { + "External id": 990557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158306.821, "dur": 3.965, + "args": { + "External id": 990558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941158309.961, "dur": 0.745, + "args": { + "External id": 990559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941158317.554, "dur": 64066.157, + "args": { + "External id": 990560,"Record function id": 0, "Sequence number": 10552510, "Fwd thread id": 1, "Ev Idx": 8543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941158325.487, "dur": 64046.765, + "args": { + "External id": 990561,"Sequence number": 10552510, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8544 + } + }, + { + "ph": "f", "id": 409, "pid": 2338708, "tid": 2379421, "ts": 6345941158325.487, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 2338708, "tid": 2379421, + "ts": 6345941158361.471, "dur": 47.215, + "args": { + "External id": 990562,"Record function id": 0, "Ev Idx": 8545 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 2338708, "tid": 2379421, + "ts": 6345941158417.838, "dur": 84.329, + "args": { + "External id": 990563,"Record function id": 0, "Ev Idx": 8546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 2338708, "tid": 2379421, + "ts": 6345941158509.474, "dur": 63853.826, + "args": { + "External id": 990564,"Record function id": 0, "Ev Idx": 8547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941158610.863, "dur": 8.503, + "args": { + "External id": 990565,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941158631.288, "dur": 5.494, + "args": { + "External id": 990566,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941158652.168, "dur": 62568.902, + "args": { + "External id": 990567,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941158670.145, "dur": 62535.572, + "args": { + "External id": 990568,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941158779.110, "dur": 19.851, + "args": { + "External id": 990569,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941158822.089, "dur": 62328.713, + "args": { + "External id": 990570,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941158827.999, "dur": 62321.579, + "args": { + "External id": 990571,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941158833.456, "dur": 8.985, + "args": { + "External id": 990572,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941158844.692, "dur": 62298.906, + "args": { + "External id": 990573,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941221349.384, "dur": 14.032, + "args": { + "External id": 990574,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941221353.146, "dur": 9.717, + "args": { + "External id": 990575,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941221398.448, "dur": 457.278, + "args": { + "External id": 990576,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941221435.121, "dur": 414.796, + "args": { + "External id": 990577,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8560, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941221448.510, "dur": 394.732, + "args": { + "External id": 990578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941221880.398, "dur": 2.621, + "args": { + "External id": 990579,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8562, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941221950.816, "dur": 7.738, + "args": { + "External id": 990580,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941221972.611, "dur": 61.988, + "args": { + "External id": 990581,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941222051.768, "dur": 41.517, + "args": { + "External id": 990582,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941222102.564, "dur": 19.668, + "args": { + "External id": 990583,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941222131.939, "dur": 1.114, + "args": { + "External id": 990584,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941222138.412, "dur": 13.288, + "args": { + "External id": 990585,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941222157.786, "dur": 0.913, + "args": { + "External id": 990586,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941222163.617, "dur": 10.989, + "args": { + "External id": 990587,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941222180.805, "dur": 0.863, + "args": { + "External id": 990588,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941222185.616, "dur": 12.687, + "args": { + "External id": 990589,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941222203.336, "dur": 1.572, + "args": { + "External id": 990590,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941222209.983, "dur": 11.578, + "args": { + "External id": 990591,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941222229.833, "dur": 1.118, + "args": { + "External id": 990592,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941222235.584, "dur": 11.950, + "args": { + "External id": 990593,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941222252.550, "dur": 1.113, + "args": { + "External id": 990594,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941222258.040, "dur": 11.199, + "args": { + "External id": 990595,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941222274.366, "dur": 0.995, + "args": { + "External id": 990596,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941222279.956, "dur": 11.395, + "args": { + "External id": 990597,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941222403.379, "dur": 3430.187, + "args": { + "External id": 990598,"Record function id": 0, "Ev Idx": 8581 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 2338708, "tid": 2379421, + "ts": 6345941222430.816, "dur": 1237.626, + "args": { + "External id": 990599,"Record function id": 0, "Ev Idx": 8582 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338708, "tid": 2379421, + "ts": 6345941222450.757, "dur": 369.085, + "args": { + "External id": 990600,"Record function id": 0, "Ev Idx": 8583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941222553.516, "dur": 7.482, + "args": { + "External id": 990601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941222565.204, "dur": 1.055, + "args": { + "External id": 990602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941222568.567, "dur": 0.914, + "args": { + "External id": 990603,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941222571.980, "dur": 0.957, + "args": { + "External id": 990604,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941222574.727, "dur": 1.058, + "args": { + "External id": 990605,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941222577.592, "dur": 0.919, + "args": { + "External id": 990606,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941222580.307, "dur": 1.062, + "args": { + "External id": 990607,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941222585.464, "dur": 1.638, + "args": { + "External id": 990608,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941222588.763, "dur": 3.538, + "args": { + "External id": 990609,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941222594.158, "dur": 0.851, + "args": { + "External id": 990610,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941222616.304, "dur": 169.392, + "args": { + "External id": 990611,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941222635.868, "dur": 144.401, + "args": { + "External id": 990612,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941222653.486, "dur": 16.028, + "args": { + "External id": 990613,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941222676.134, "dur": 74.803, + "args": { + "External id": 990614,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941222679.343, "dur": 71.191, + "args": { + "External id": 990615,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941222684.182, "dur": 6.503, + "args": { + "External id": 990616,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941222692.736, "dur": 57.008, + "args": { + "External id": 990617,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8600 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 2338708, "tid": 2379421, + "ts": 6345941222921.255, "dur": 738.718, + "args": { + "External id": 990618,"Record function id": 0, "Ev Idx": 8601 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338708, "tid": 2379421, + "ts": 6345941222939.845, "dur": 705.281, + "args": { + "External id": 990619,"Record function id": 0, "Ev Idx": 8602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941223029.079, "dur": 7.712, + "args": { + "External id": 990620,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941223096.968, "dur": 35.996, + "args": { + "External id": 990621,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223103.599, "dur": 2.235, + "args": { + "External id": 990622,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223108.302, "dur": 1.261, + "args": { + "External id": 990623,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223111.988, "dur": 0.924, + "args": { + "External id": 990624,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223114.670, "dur": 2.665, + "args": { + "External id": 990625,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223118.918, "dur": 0.827, + "args": { + "External id": 990626,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223122.686, "dur": 0.469, + "args": { + "External id": 990627,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223124.678, "dur": 0.445, + "args": { + "External id": 990628,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223126.509, "dur": 0.396, + "args": { + "External id": 990629,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223128.911, "dur": 0.425, + "args": { + "External id": 990630,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941223148.254, "dur": 51.535, + "args": { + "External id": 990631,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941223242.470, "dur": 137.723, + "args": { + "External id": 990632,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941223256.547, "dur": 6.200, + "args": { + "External id": 990633,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941223270.700, "dur": 13.576, + "args": { + "External id": 990634,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941223276.476, "dur": 7.328, + "args": { + "External id": 990635,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223280.509, "dur": 1.834, + "args": { + "External id": 990636,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941223292.725, "dur": 31.726, + "args": { + "External id": 990637,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223295.449, "dur": 2.639, + "args": { + "External id": 990638,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223299.757, "dur": 0.676, + "args": { + "External id": 990639,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223302.763, "dur": 0.783, + "args": { + "External id": 990640,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223305.284, "dur": 0.571, + "args": { + "External id": 990641,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223307.691, "dur": 0.603, + "args": { + "External id": 990642,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223310.208, "dur": 0.401, + "args": { + "External id": 990643,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223312.429, "dur": 0.359, + "args": { + "External id": 990644,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223314.405, "dur": 0.634, + "args": { + "External id": 990645,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941223317.311, "dur": 2.796, + "args": { + "External id": 990646,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941223336.764, "dur": 34.679, + "args": { + "External id": 990647,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941223430.876, "dur": 132.981, + "args": { + "External id": 990648,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941223460.947, "dur": 98.938, + "args": { + "External id": 990649,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8632, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941223471.596, "dur": 83.476, + "args": { + "External id": 990650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941223583.282, "dur": 2.144, + "args": { + "External id": 990651,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8634, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941223677.341, "dur": 2132.932, + "args": { + "External id": 990652,"Sequence number": 10552509, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8635 + } + }, + { + "ph": "f", "id": 410, "pid": 2338708, "tid": 2379421, "ts": 6345941223677.341, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941223805.967, "dur": 127.531, + "args": { + "External id": 990653,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941223983.266, "dur": 113.576, + "args": { + "External id": 990654,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941224126.624, "dur": 74.441, + "args": { + "External id": 990655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941224213.580, "dur": 38.106, + "args": { + "External id": 990656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941224259.139, "dur": 39.192, + "args": { + "External id": 990657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941224306.525, "dur": 33.061, + "args": { + "External id": 990658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941224350.446, "dur": 36.086, + "args": { + "External id": 990659,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941224418.573, "dur": 30.083, + "args": { + "External id": 990660,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941224471.206, "dur": 35.549, + "args": { + "External id": 990661,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941224533.508, "dur": 23.874, + "args": { + "External id": 990662,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941224575.001, "dur": 18.744, + "args": { + "External id": 990663,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941224606.598, "dur": 45.901, + "args": { + "External id": 990664,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941224657.376, "dur": 38.766, + "args": { + "External id": 990665,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941224730.043, "dur": 406.005, + "args": { + "External id": 990666,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941224841.666, "dur": 10.109, + "args": { + "External id": 990667,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941224854.649, "dur": 3.588, + "args": { + "External id": 990668,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941224859.801, "dur": 2.540, + "args": { + "External id": 990669,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941224863.538, "dur": 5.383, + "args": { + "External id": 990670,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941224926.632, "dur": 6.307, + "args": { + "External id": 990671,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941224929.241, "dur": 3.485, + "args": { + "External id": 990672,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941224935.418, "dur": 43.025, + "args": { + "External id": 990673,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941224946.947, "dur": 1.955, + "args": { + "External id": 990674,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941224980.126, "dur": 2.402, + "args": { + "External id": 990675,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941224981.647, "dur": 0.782, + "args": { + "External id": 990676,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941224983.908, "dur": 44.620, + "args": { + "External id": 990677,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941224986.934, "dur": 0.842, + "args": { + "External id": 990678,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941225183.362, "dur": 38.133, + "args": { + "External id": 990679,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941225241.845, "dur": 22.481, + "args": { + "External id": 990680,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941225274.796, "dur": 67.921, + "args": { + "External id": 990681,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941225351.512, "dur": 45.178, + "args": { + "External id": 990682,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941225407.047, "dur": 25.129, + "args": { + "External id": 990683,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941225441.958, "dur": 36.833, + "args": { + "External id": 990684,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941225488.274, "dur": 32.314, + "args": { + "External id": 990685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941225530.525, "dur": 38.954, + "args": { + "External id": 990686,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941225590.316, "dur": 31.047, + "args": { + "External id": 990687,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941225639.495, "dur": 28.155, + "args": { + "External id": 990688,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941225684.284, "dur": 22.629, + "args": { + "External id": 990689,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941225722.689, "dur": 15.121, + "args": { + "External id": 990690,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941225751.710, "dur": 20.780, + "args": { + "External id": 990691,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225860.204, "dur": 17.836, + "args": { + "External id": 990692,"Record function id": 0, "Ev Idx": 8675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225864.445, "dur": 12.594, + "args": { + "External id": 990693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225869.097, "dur": 6.789, + "args": { + "External id": 990694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225871.097, "dur": 4.650, + "args": { + "External id": 990695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225882.632, "dur": 6.478, + "args": { + "External id": 990696,"Record function id": 0, "Ev Idx": 8679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225884.158, "dur": 4.379, + "args": { + "External id": 990697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225885.322, "dur": 2.526, + "args": { + "External id": 990698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225886.524, "dur": 1.244, + "args": { + "External id": 990699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225893.114, "dur": 5.493, + "args": { + "External id": 990700,"Record function id": 0, "Ev Idx": 8683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225894.696, "dur": 3.435, + "args": { + "External id": 990701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225895.529, "dur": 1.991, + "args": { + "External id": 990702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225896.016, "dur": 1.401, + "args": { + "External id": 990703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225902.318, "dur": 4.681, + "args": { + "External id": 990704,"Record function id": 0, "Ev Idx": 8687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225903.750, "dur": 2.768, + "args": { + "External id": 990705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225904.516, "dur": 1.480, + "args": { + "External id": 990706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225905.125, "dur": 0.793, + "args": { + "External id": 990707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225910.732, "dur": 17.748, + "args": { + "External id": 990708,"Record function id": 0, "Ev Idx": 8691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225924.836, "dur": 3.141, + "args": { + "External id": 990709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225925.724, "dur": 1.561, + "args": { + "External id": 990710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225926.269, "dur": 0.928, + "args": { + "External id": 990711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225932.424, "dur": 9.429, + "args": { + "External id": 990712,"Record function id": 0, "Ev Idx": 8695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225933.866, "dur": 7.450, + "args": { + "External id": 990713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225934.512, "dur": 6.135, + "args": { + "External id": 990714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225937.301, "dur": 3.216, + "args": { + "External id": 990715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225945.821, "dur": 4.154, + "args": { + "External id": 990716,"Record function id": 0, "Ev Idx": 8699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225946.903, "dur": 2.592, + "args": { + "External id": 990717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225947.542, "dur": 1.326, + "args": { + "External id": 990718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225947.916, "dur": 0.876, + "args": { + "External id": 990719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225953.592, "dur": 42.400, + "args": { + "External id": 990720,"Record function id": 0, "Ev Idx": 8703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941225992.498, "dur": 2.981, + "args": { + "External id": 990721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225993.381, "dur": 1.508, + "args": { + "External id": 990722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941225993.903, "dur": 0.888, + "args": { + "External id": 990723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941226006.616, "dur": 29.861, + "args": { + "External id": 990724,"Record function id": 0, "Ev Idx": 8707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941226027.610, "dur": 7.682, + "args": { + "External id": 990725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941226029.215, "dur": 5.035, + "args": { + "External id": 990726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941226032.289, "dur": 1.623, + "args": { + "External id": 990727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941226044.077, "dur": 61343.868, + "args": { + "External id": 990728,"Record function id": 0, "Sequence number": 10552508, "Fwd thread id": 1, "Ev Idx": 8711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941226046.063, "dur": 61330.702, + "args": { + "External id": 990729,"Sequence number": 10552508, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8712 + } + }, + { + "ph": "f", "id": 411, "pid": 2338708, "tid": 2379421, "ts": 6345941226046.063, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 2338708, "tid": 2379421, + "ts": 6345941226133.772, "dur": 45.747, + "args": { + "External id": 990730,"Record function id": 0, "Ev Idx": 8713 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 2338708, "tid": 2379421, + "ts": 6345941226189.435, "dur": 82.866, + "args": { + "External id": 990731,"Record function id": 0, "Ev Idx": 8714 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 2338708, "tid": 2379421, + "ts": 6345941226280.084, "dur": 61086.919, + "args": { + "External id": 990732,"Record function id": 0, "Ev Idx": 8715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941226394.728, "dur": 8.724, + "args": { + "External id": 990733,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941226414.939, "dur": 5.863, + "args": { + "External id": 990734,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941226448.158, "dur": 59823.392, + "args": { + "External id": 990735,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941226468.420, "dur": 59788.310, + "args": { + "External id": 990736,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941226589.022, "dur": 21.371, + "args": { + "External id": 990737,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941226635.944, "dur": 59574.420, + "args": { + "External id": 990738,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941226640.001, "dur": 59569.456, + "args": { + "External id": 990739,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941226645.597, "dur": 10.306, + "args": { + "External id": 990740,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941226657.975, "dur": 59547.781, + "args": { + "External id": 990741,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941286398.394, "dur": 13.316, + "args": { + "External id": 990742,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941286401.947, "dur": 9.058, + "args": { + "External id": 990743,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941286448.238, "dur": 409.781, + "args": { + "External id": 990744,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941286484.869, "dur": 367.330, + "args": { + "External id": 990745,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8728, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941286497.819, "dur": 346.578, + "args": { + "External id": 990746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941286881.050, "dur": 2.631, + "args": { + "External id": 990747,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8730, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941286951.846, "dur": 7.316, + "args": { + "External id": 990748,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941286973.482, "dur": 58.565, + "args": { + "External id": 990749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941287049.237, "dur": 40.753, + "args": { + "External id": 990750,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941287100.239, "dur": 19.752, + "args": { + "External id": 990751,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941287129.096, "dur": 1.295, + "args": { + "External id": 990752,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941287136.767, "dur": 13.173, + "args": { + "External id": 990753,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941287156.181, "dur": 1.039, + "args": { + "External id": 990754,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941287162.566, "dur": 11.591, + "args": { + "External id": 990755,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941287179.915, "dur": 1.056, + "args": { + "External id": 990756,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941287186.185, "dur": 12.239, + "args": { + "External id": 990757,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941287203.852, "dur": 1.384, + "args": { + "External id": 990758,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941287210.398, "dur": 11.936, + "args": { + "External id": 990759,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941287227.518, "dur": 1.016, + "args": { + "External id": 990760,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941287233.545, "dur": 12.347, + "args": { + "External id": 990761,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941287251.210, "dur": 1.281, + "args": { + "External id": 990762,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941287258.104, "dur": 11.619, + "args": { + "External id": 990763,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941287277.882, "dur": 0.989, + "args": { + "External id": 990764,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941287283.883, "dur": 11.992, + "args": { + "External id": 990765,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941287407.405, "dur": 3375.820, + "args": { + "External id": 990766,"Record function id": 0, "Ev Idx": 8749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 2338708, "tid": 2379421, + "ts": 6345941287432.157, "dur": 1240.532, + "args": { + "External id": 990767,"Record function id": 0, "Ev Idx": 8750 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338708, "tid": 2379421, + "ts": 6345941287451.832, "dur": 372.268, + "args": { + "External id": 990768,"Record function id": 0, "Ev Idx": 8751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941287555.083, "dur": 6.884, + "args": { + "External id": 990769,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941287566.007, "dur": 1.425, + "args": { + "External id": 990770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941287570.044, "dur": 1.147, + "args": { + "External id": 990771,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941287573.126, "dur": 1.006, + "args": { + "External id": 990772,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941287581.104, "dur": 1.037, + "args": { + "External id": 990773,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941287583.833, "dur": 0.866, + "args": { + "External id": 990774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941287586.729, "dur": 1.165, + "args": { + "External id": 990775,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941287592.159, "dur": 1.907, + "args": { + "External id": 990776,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941287595.896, "dur": 2.922, + "args": { + "External id": 990777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941287600.451, "dur": 1.013, + "args": { + "External id": 990778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941287622.271, "dur": 167.227, + "args": { + "External id": 990779,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941287640.725, "dur": 142.935, + "args": { + "External id": 990780,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941287659.696, "dur": 17.815, + "args": { + "External id": 990781,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941287681.862, "dur": 74.691, + "args": { + "External id": 990782,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941287684.940, "dur": 71.179, + "args": { + "External id": 990783,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941287689.551, "dur": 7.471, + "args": { + "External id": 990784,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941287699.013, "dur": 56.480, + "args": { + "External id": 990785,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8768 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 2338708, "tid": 2379421, + "ts": 6345941287930.700, "dur": 732.753, + "args": { + "External id": 990786,"Record function id": 0, "Ev Idx": 8769 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338708, "tid": 2379421, + "ts": 6345941287950.687, "dur": 698.428, + "args": { + "External id": 990787,"Record function id": 0, "Ev Idx": 8770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941288041.798, "dur": 9.966, + "args": { + "External id": 990788,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941288110.621, "dur": 33.441, + "args": { + "External id": 990789,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288117.165, "dur": 2.054, + "args": { + "External id": 990790,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288121.574, "dur": 0.398, + "args": { + "External id": 990791,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288123.226, "dur": 0.713, + "args": { + "External id": 990792,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288126.007, "dur": 3.128, + "args": { + "External id": 990793,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288131.101, "dur": 0.408, + "args": { + "External id": 990794,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288132.627, "dur": 0.545, + "args": { + "External id": 990795,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288135.167, "dur": 0.538, + "args": { + "External id": 990796,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288137.702, "dur": 0.419, + "args": { + "External id": 990797,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288139.477, "dur": 0.391, + "args": { + "External id": 990798,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941288157.213, "dur": 52.746, + "args": { + "External id": 990799,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941288249.414, "dur": 130.328, + "args": { + "External id": 990800,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941288263.165, "dur": 4.801, + "args": { + "External id": 990801,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941288274.621, "dur": 11.655, + "args": { + "External id": 990802,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941288279.646, "dur": 6.159, + "args": { + "External id": 990803,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288283.721, "dur": 0.907, + "args": { + "External id": 990804,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941288294.524, "dur": 29.264, + "args": { + "External id": 990805,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288296.599, "dur": 2.981, + "args": { + "External id": 990806,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288300.922, "dur": 0.710, + "args": { + "External id": 990807,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288303.895, "dur": 0.426, + "args": { + "External id": 990808,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288306.248, "dur": 0.427, + "args": { + "External id": 990809,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288308.107, "dur": 0.481, + "args": { + "External id": 990810,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288310.477, "dur": 0.402, + "args": { + "External id": 990811,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288312.620, "dur": 0.467, + "args": { + "External id": 990812,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288314.486, "dur": 0.482, + "args": { + "External id": 990813,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941288316.875, "dur": 2.625, + "args": { + "External id": 990814,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941288335.603, "dur": 35.479, + "args": { + "External id": 990815,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941288434.242, "dur": 133.539, + "args": { + "External id": 990816,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941288465.642, "dur": 98.167, + "args": { + "External id": 990817,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8800, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941288476.847, "dur": 81.931, + "args": { + "External id": 990818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941288588.008, "dur": 2.130, + "args": { + "External id": 990819,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8802, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941288681.037, "dur": 2077.921, + "args": { + "External id": 990820,"Sequence number": 10552507, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8803 + } + }, + { + "ph": "f", "id": 412, "pid": 2338708, "tid": 2379421, "ts": 6345941288681.037, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941288809.841, "dur": 119.410, + "args": { + "External id": 990821,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941288976.352, "dur": 71.060, + "args": { + "External id": 990822,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941289118.149, "dur": 67.038, + "args": { + "External id": 990823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941289197.552, "dur": 36.778, + "args": { + "External id": 990824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941289242.088, "dur": 36.579, + "args": { + "External id": 990825,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941289286.585, "dur": 34.124, + "args": { + "External id": 990826,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941289331.722, "dur": 32.927, + "args": { + "External id": 990827,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941289397.425, "dur": 28.953, + "args": { + "External id": 990828,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941289449.974, "dur": 33.727, + "args": { + "External id": 990829,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941289509.802, "dur": 23.491, + "args": { + "External id": 990830,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941289550.296, "dur": 17.383, + "args": { + "External id": 990831,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941289580.619, "dur": 44.779, + "args": { + "External id": 990832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941289630.628, "dur": 40.278, + "args": { + "External id": 990833,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941289705.906, "dur": 395.769, + "args": { + "External id": 990834,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941289804.026, "dur": 7.646, + "args": { + "External id": 990835,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941289814.262, "dur": 3.324, + "args": { + "External id": 990836,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941289819.221, "dur": 2.518, + "args": { + "External id": 990837,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941289823.768, "dur": 4.851, + "args": { + "External id": 990838,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941289902.480, "dur": 7.140, + "args": { + "External id": 990839,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941289904.879, "dur": 3.893, + "args": { + "External id": 990840,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941289912.347, "dur": 40.325, + "args": { + "External id": 990841,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941289919.710, "dur": 2.047, + "args": { + "External id": 990842,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941289954.498, "dur": 1.917, + "args": { + "External id": 990843,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941289955.715, "dur": 0.595, + "args": { + "External id": 990844,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941289958.000, "dur": 20.706, + "args": { + "External id": 990845,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941289960.470, "dur": 0.636, + "args": { + "External id": 990846,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941290153.375, "dur": 37.463, + "args": { + "External id": 990847,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941290210.334, "dur": 20.000, + "args": { + "External id": 990848,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941290241.038, "dur": 60.189, + "args": { + "External id": 990849,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941290309.621, "dur": 45.953, + "args": { + "External id": 990850,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941290365.912, "dur": 24.679, + "args": { + "External id": 990851,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 8834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941290400.652, "dur": 36.800, + "args": { + "External id": 990852,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 8835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941290446.401, "dur": 31.917, + "args": { + "External id": 990853,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941290487.282, "dur": 34.716, + "args": { + "External id": 990854,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941290543.425, "dur": 28.840, + "args": { + "External id": 990855,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 8838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941290591.168, "dur": 28.913, + "args": { + "External id": 990856,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941290637.020, "dur": 20.080, + "args": { + "External id": 990857,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941290672.276, "dur": 15.768, + "args": { + "External id": 990858,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941290702.149, "dur": 18.489, + "args": { + "External id": 990859,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 8842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290808.272, "dur": 28.696, + "args": { + "External id": 990860,"Record function id": 0, "Ev Idx": 8843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290812.020, "dur": 23.686, + "args": { + "External id": 990861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290827.757, "dur": 6.879, + "args": { + "External id": 990862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290830.136, "dur": 4.342, + "args": { + "External id": 990863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290842.014, "dur": 6.111, + "args": { + "External id": 990864,"Record function id": 0, "Ev Idx": 8847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290843.654, "dur": 3.942, + "args": { + "External id": 990865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290844.598, "dur": 2.384, + "args": { + "External id": 990866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290845.557, "dur": 1.325, + "args": { + "External id": 990867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290852.204, "dur": 5.429, + "args": { + "External id": 990868,"Record function id": 0, "Ev Idx": 8851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290853.864, "dur": 3.291, + "args": { + "External id": 990869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290854.485, "dur": 1.903, + "args": { + "External id": 990870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290855.101, "dur": 1.206, + "args": { + "External id": 990871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 8854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290861.431, "dur": 4.061, + "args": { + "External id": 990872,"Record function id": 0, "Ev Idx": 8855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290862.731, "dur": 2.245, + "args": { + "External id": 990873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290863.299, "dur": 1.199, + "args": { + "External id": 990874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290863.667, "dur": 0.749, + "args": { + "External id": 990875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 8858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290869.198, "dur": 4.221, + "args": { + "External id": 990876,"Record function id": 0, "Ev Idx": 8859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290870.628, "dur": 2.290, + "args": { + "External id": 990877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290871.216, "dur": 1.080, + "args": { + "External id": 990878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290871.529, "dur": 0.689, + "args": { + "External id": 990879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290877.185, "dur": 6.718, + "args": { + "External id": 990880,"Record function id": 0, "Ev Idx": 8863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290878.438, "dur": 4.962, + "args": { + "External id": 990881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290879.009, "dur": 3.777, + "args": { + "External id": 990882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290879.490, "dur": 3.184, + "args": { + "External id": 990883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 8866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290887.678, "dur": 4.356, + "args": { + "External id": 990884,"Record function id": 0, "Ev Idx": 8867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290888.830, "dur": 2.728, + "args": { + "External id": 990885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290889.461, "dur": 1.647, + "args": { + "External id": 990886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290889.799, "dur": 1.229, + "args": { + "External id": 990887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290895.688, "dur": 6.229, + "args": { + "External id": 990888,"Record function id": 0, "Ev Idx": 8871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290897.007, "dur": 4.432, + "args": { + "External id": 990889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290897.576, "dur": 3.148, + "args": { + "External id": 990890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290899.939, "dur": 0.664, + "args": { + "External id": 990891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 8874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290906.103, "dur": 4.225, + "args": { + "External id": 990892,"Record function id": 0, "Ev Idx": 8875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941290907.212, "dur": 2.651, + "args": { + "External id": 990893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290908.022, "dur": 1.142, + "args": { + "External id": 990894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941290908.360, "dur": 0.728, + "args": { + "External id": 990895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 8878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941290915.929, "dur": 63111.182, + "args": { + "External id": 990896,"Record function id": 0, "Sequence number": 10552506, "Fwd thread id": 1, "Ev Idx": 8879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941290917.535, "dur": 63080.294, + "args": { + "External id": 990897,"Sequence number": 10552506, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8880 + } + }, + { + "ph": "f", "id": 413, "pid": 2338708, "tid": 2379421, "ts": 6345941290917.535, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 2338708, "tid": 2379421, + "ts": 6345941290952.679, "dur": 45.155, + "args": { + "External id": 990898,"Record function id": 0, "Ev Idx": 8881 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 2338708, "tid": 2379421, + "ts": 6345941291006.788, "dur": 149.431, + "args": { + "External id": 990899,"Record function id": 0, "Ev Idx": 8882 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 2338708, "tid": 2379421, + "ts": 6345941291166.063, "dur": 62823.033, + "args": { + "External id": 990900,"Record function id": 0, "Ev Idx": 8883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941291275.258, "dur": 8.840, + "args": { + "External id": 990901,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941291296.444, "dur": 5.734, + "args": { + "External id": 990902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941291318.267, "dur": 61561.457, + "args": { + "External id": 990903,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941291334.799, "dur": 61530.210, + "args": { + "External id": 990904,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 8887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941291443.458, "dur": 20.240, + "args": { + "External id": 990905,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941291490.591, "dur": 61327.043, + "args": { + "External id": 990906,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 8889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941291497.271, "dur": 61319.275, + "args": { + "External id": 990907,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 8890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941291507.824, "dur": 9.784, + "args": { + "External id": 990908,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941291520.184, "dur": 61290.746, + "args": { + "External id": 990909,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 8892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941353004.689, "dur": 28.302, + "args": { + "External id": 990910,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 8893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941353020.898, "dur": 11.245, + "args": { + "External id": 990911,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941353099.122, "dur": 454.228, + "args": { + "External id": 990912,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 8895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941353134.778, "dur": 412.750, + "args": { + "External id": 990913,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8896, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941353148.777, "dur": 392.377, + "args": { + "External id": 990914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 8897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941353578.923, "dur": 2.594, + "args": { + "External id": 990915,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8898, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941353657.110, "dur": 7.834, + "args": { + "External id": 990916,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941353679.550, "dur": 39.407, + "args": { + "External id": 990917,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941353730.911, "dur": 4.088, + "args": { + "External id": 990918,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941353741.745, "dur": 13.764, + "args": { + "External id": 990919,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941353762.602, "dur": 0.977, + "args": { + "External id": 990920,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941353768.790, "dur": 12.880, + "args": { + "External id": 990921,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941353786.686, "dur": 1.036, + "args": { + "External id": 990922,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941353792.752, "dur": 11.809, + "args": { + "External id": 990923,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 8906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941353809.843, "dur": 0.923, + "args": { + "External id": 990924,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941353815.764, "dur": 12.630, + "args": { + "External id": 990925,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 8908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941353833.172, "dur": 1.183, + "args": { + "External id": 990926,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941353838.919, "dur": 11.385, + "args": { + "External id": 990927,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 8910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941353855.735, "dur": 0.913, + "args": { + "External id": 990928,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941353861.477, "dur": 12.415, + "args": { + "External id": 990929,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941353879.022, "dur": 0.923, + "args": { + "External id": 990930,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941353884.447, "dur": 10.797, + "args": { + "External id": 990931,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 8914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941353902.710, "dur": 0.800, + "args": { + "External id": 990932,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941353907.882, "dur": 11.704, + "args": { + "External id": 990933,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 8916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941354048.610, "dur": 3458.302, + "args": { + "External id": 990934,"Record function id": 0, "Ev Idx": 8917 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 2338708, "tid": 2379421, + "ts": 6345941354111.980, "dur": 1278.868, + "args": { + "External id": 990935,"Record function id": 0, "Ev Idx": 8918 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338708, "tid": 2379421, + "ts": 6345941354130.681, "dur": 397.790, + "args": { + "External id": 990936,"Record function id": 0, "Ev Idx": 8919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941354238.154, "dur": 8.963, + "args": { + "External id": 990937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 8920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941354252.109, "dur": 1.059, + "args": { + "External id": 990938,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941354255.417, "dur": 1.208, + "args": { + "External id": 990939,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941354258.947, "dur": 0.882, + "args": { + "External id": 990940,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941354262.020, "dur": 1.033, + "args": { + "External id": 990941,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 8924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941354264.737, "dur": 0.922, + "args": { + "External id": 990942,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 8925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941354267.227, "dur": 0.671, + "args": { + "External id": 990943,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 8926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941354271.723, "dur": 2.279, + "args": { + "External id": 990944,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941354275.417, "dur": 3.197, + "args": { + "External id": 990945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941354279.937, "dur": 0.674, + "args": { + "External id": 990946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 8929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941354301.093, "dur": 186.186, + "args": { + "External id": 990947,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941354323.310, "dur": 157.479, + "args": { + "External id": 990948,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 8931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941354344.338, "dur": 17.958, + "args": { + "External id": 990949,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941354367.638, "dur": 81.208, + "args": { + "External id": 990950,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 8933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941354370.783, "dur": 77.581, + "args": { + "External id": 990951,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 8934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354375.479, "dur": 6.047, + "args": { + "External id": 990952,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941354383.567, "dur": 64.323, + "args": { + "External id": 990953,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 8936 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 2338708, "tid": 2379421, + "ts": 6345941354632.317, "dur": 749.828, + "args": { + "External id": 990954,"Record function id": 0, "Ev Idx": 8937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338708, "tid": 2379421, + "ts": 6345941354652.898, "dur": 714.371, + "args": { + "External id": 990955,"Record function id": 0, "Ev Idx": 8938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941354724.251, "dur": 6.840, + "args": { + "External id": 990956,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941354751.360, "dur": 33.528, + "args": { + "External id": 990957,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354757.164, "dur": 2.407, + "args": { + "External id": 990958,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354762.858, "dur": 0.606, + "args": { + "External id": 990959,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354764.912, "dur": 0.549, + "args": { + "External id": 990960,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354767.205, "dur": 2.882, + "args": { + "External id": 990961,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354771.912, "dur": 0.436, + "args": { + "External id": 990962,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354773.913, "dur": 0.548, + "args": { + "External id": 990963,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354776.777, "dur": 0.543, + "args": { + "External id": 990964,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354779.013, "dur": 0.413, + "args": { + "External id": 990965,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354780.958, "dur": 0.413, + "args": { + "External id": 990966,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941354797.218, "dur": 50.828, + "args": { + "External id": 990967,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941354889.395, "dur": 154.324, + "args": { + "External id": 990968,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 8951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941354901.739, "dur": 3.669, + "args": { + "External id": 990969,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941354911.479, "dur": 13.214, + "args": { + "External id": 990970,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941354917.646, "dur": 6.459, + "args": { + "External id": 990971,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 8954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354922.151, "dur": 0.654, + "args": { + "External id": 990972,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 8955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941354932.749, "dur": 29.063, + "args": { + "External id": 990973,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 8956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354934.962, "dur": 2.933, + "args": { + "External id": 990974,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354939.247, "dur": 0.619, + "args": { + "External id": 990975,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354941.939, "dur": 0.786, + "args": { + "External id": 990976,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354944.504, "dur": 0.407, + "args": { + "External id": 990977,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354946.099, "dur": 0.553, + "args": { + "External id": 990978,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354948.514, "dur": 0.490, + "args": { + "External id": 990979,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354950.965, "dur": 0.362, + "args": { + "External id": 990980,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354952.659, "dur": 0.550, + "args": { + "External id": 990981,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941354954.870, "dur": 2.517, + "args": { + "External id": 990982,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 8965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941354972.746, "dur": 58.715, + "args": { + "External id": 990983,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 8966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941355142.990, "dur": 140.777, + "args": { + "External id": 990984,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 8967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941355176.215, "dur": 103.433, + "args": { + "External id": 990985,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 8968, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941355187.546, "dur": 87.320, + "args": { + "External id": 990986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 8969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941355302.912, "dur": 2.241, + "args": { + "External id": 990987,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 8970, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941355399.650, "dur": 2083.740, + "args": { + "External id": 990988,"Sequence number": 10552505, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 8971 + } + }, + { + "ph": "f", "id": 414, "pid": 2338708, "tid": 2379421, "ts": 6345941355399.650, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941355529.480, "dur": 120.786, + "args": { + "External id": 990989,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 8972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941355697.578, "dur": 44.265, + "args": { + "External id": 990990,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 8973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941355763.932, "dur": 54.553, + "args": { + "External id": 990991,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 8974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941355830.632, "dur": 37.008, + "args": { + "External id": 990992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941355875.261, "dur": 36.275, + "args": { + "External id": 990993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941355919.163, "dur": 32.706, + "args": { + "External id": 990994,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 8977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941355962.636, "dur": 34.598, + "args": { + "External id": 990995,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 8978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941356050.254, "dur": 72.362, + "args": { + "External id": 990996,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 8979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941356150.285, "dur": 35.003, + "args": { + "External id": 990997,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941356214.598, "dur": 24.454, + "args": { + "External id": 990998,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 8981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941356257.763, "dur": 19.809, + "args": { + "External id": 990999,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 8982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941356291.496, "dur": 54.032, + "args": { + "External id": 991000,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 8983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941356349.905, "dur": 39.699, + "args": { + "External id": 991001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 8984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941356423.413, "dur": 321.413, + "args": { + "External id": 991002,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 8985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941356537.509, "dur": 8.453, + "args": { + "External id": 991003,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941356548.765, "dur": 4.003, + "args": { + "External id": 991004,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941356554.245, "dur": 2.333, + "args": { + "External id": 991005,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941356558.197, "dur": 4.480, + "args": { + "External id": 991006,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941356621.454, "dur": 5.928, + "args": { + "External id": 991007,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941356623.660, "dur": 3.510, + "args": { + "External id": 991008,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941356633.209, "dur": 38.009, + "args": { + "External id": 991009,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941356639.370, "dur": 2.050, + "args": { + "External id": 991010,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941356673.033, "dur": 2.462, + "args": { + "External id": 991011,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941356674.457, "dur": 0.939, + "args": { + "External id": 991012,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 8995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941356677.010, "dur": 19.224, + "args": { + "External id": 991013,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 8996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941356679.164, "dur": 0.845, + "args": { + "External id": 991014,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 8997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941356792.583, "dur": 34.388, + "args": { + "External id": 991015,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941356846.627, "dur": 19.043, + "args": { + "External id": 991016,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 8999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941356875.577, "dur": 48.272, + "args": { + "External id": 991017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941356931.826, "dur": 46.086, + "args": { + "External id": 991018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941356988.333, "dur": 51.236, + "args": { + "External id": 991019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941357088.098, "dur": 46.349, + "args": { + "External id": 991020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941357147.370, "dur": 32.819, + "args": { + "External id": 991021,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941357189.784, "dur": 35.169, + "args": { + "External id": 991022,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941357248.362, "dur": 28.222, + "args": { + "External id": 991023,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941357294.841, "dur": 30.825, + "args": { + "External id": 991024,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941357352.372, "dur": 23.847, + "args": { + "External id": 991025,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941357394.779, "dur": 15.801, + "args": { + "External id": 991026,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941357427.059, "dur": 19.030, + "args": { + "External id": 991027,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357533.267, "dur": 17.319, + "args": { + "External id": 991028,"Record function id": 0, "Ev Idx": 9011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357536.782, "dur": 12.820, + "args": { + "External id": 991029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357541.594, "dur": 6.857, + "args": { + "External id": 991030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357543.819, "dur": 4.497, + "args": { + "External id": 991031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357555.234, "dur": 5.981, + "args": { + "External id": 991032,"Record function id": 0, "Ev Idx": 9015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357556.658, "dur": 3.982, + "args": { + "External id": 991033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357557.495, "dur": 2.350, + "args": { + "External id": 991034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357558.376, "dur": 1.357, + "args": { + "External id": 991035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357564.993, "dur": 4.832, + "args": { + "External id": 991036,"Record function id": 0, "Ev Idx": 9019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357566.120, "dur": 3.213, + "args": { + "External id": 991037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357566.948, "dur": 1.740, + "args": { + "External id": 991038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357567.485, "dur": 1.127, + "args": { + "External id": 991039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357573.625, "dur": 4.495, + "args": { + "External id": 991040,"Record function id": 0, "Ev Idx": 9023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357574.873, "dur": 2.766, + "args": { + "External id": 991041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357575.705, "dur": 1.298, + "args": { + "External id": 991042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357576.190, "dur": 0.735, + "args": { + "External id": 991043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357581.786, "dur": 4.474, + "args": { + "External id": 991044,"Record function id": 0, "Ev Idx": 9027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357583.142, "dur": 2.622, + "args": { + "External id": 991045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357583.715, "dur": 1.370, + "args": { + "External id": 991046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357584.225, "dur": 0.782, + "args": { + "External id": 991047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357589.938, "dur": 7.027, + "args": { + "External id": 991048,"Record function id": 0, "Ev Idx": 9031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357591.151, "dur": 5.311, + "args": { + "External id": 991049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357591.745, "dur": 4.169, + "args": { + "External id": 991050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357592.389, "dur": 3.374, + "args": { + "External id": 991051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357600.728, "dur": 4.295, + "args": { + "External id": 991052,"Record function id": 0, "Ev Idx": 9035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357601.906, "dur": 2.628, + "args": { + "External id": 991053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357602.442, "dur": 1.426, + "args": { + "External id": 991054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357602.917, "dur": 0.874, + "args": { + "External id": 991055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357609.065, "dur": 4.280, + "args": { + "External id": 991056,"Record function id": 0, "Ev Idx": 9039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357610.193, "dur": 2.629, + "args": { + "External id": 991057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357610.830, "dur": 1.159, + "args": { + "External id": 991058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357611.151, "dur": 0.740, + "args": { + "External id": 991059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357617.285, "dur": 4.508, + "args": { + "External id": 991060,"Record function id": 0, "Ev Idx": 9043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941357618.496, "dur": 2.799, + "args": { + "External id": 991061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357619.067, "dur": 1.440, + "args": { + "External id": 991062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941357619.694, "dur": 0.666, + "args": { + "External id": 991063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941357626.731, "dur": 61283.538, + "args": { + "External id": 991064,"Record function id": 0, "Sequence number": 10552504, "Fwd thread id": 1, "Ev Idx": 9047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941357628.278, "dur": 61271.315, + "args": { + "External id": 991065,"Sequence number": 10552504, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9048 + } + }, + { + "ph": "f", "id": 415, "pid": 2338708, "tid": 2379421, "ts": 6345941357628.278, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 2338708, "tid": 2379421, + "ts": 6345941357659.826, "dur": 44.423, + "args": { + "External id": 991066,"Record function id": 0, "Ev Idx": 9049 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 2338708, "tid": 2379421, + "ts": 6345941357713.048, "dur": 76.565, + "args": { + "External id": 991067,"Record function id": 0, "Ev Idx": 9050 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 2338708, "tid": 2379421, + "ts": 6345941357796.420, "dur": 61093.634, + "args": { + "External id": 991068,"Record function id": 0, "Ev Idx": 9051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941357897.862, "dur": 7.553, + "args": { + "External id": 991069,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941357915.801, "dur": 5.124, + "args": { + "External id": 991070,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941357937.625, "dur": 59961.429, + "args": { + "External id": 991071,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941357953.589, "dur": 59930.036, + "args": { + "External id": 991072,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941358143.605, "dur": 26.357, + "args": { + "External id": 991073,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941358198.888, "dur": 59638.111, + "args": { + "External id": 991074,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941358203.250, "dur": 59632.987, + "args": { + "External id": 991075,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941358208.836, "dur": 13.914, + "args": { + "External id": 991076,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941358225.402, "dur": 59606.793, + "args": { + "External id": 991077,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941418034.710, "dur": 15.707, + "args": { + "External id": 991078,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941418038.294, "dur": 11.258, + "args": { + "External id": 991079,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941418115.055, "dur": 345.528, + "args": { + "External id": 991080,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941418148.630, "dur": 305.875, + "args": { + "External id": 991081,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9064, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941418162.917, "dur": 284.716, + "args": { + "External id": 991082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941418483.328, "dur": 2.375, + "args": { + "External id": 991083,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9066, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941418555.732, "dur": 7.506, + "args": { + "External id": 991084,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941418577.460, "dur": 38.417, + "args": { + "External id": 991085,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941418627.770, "dur": 4.139, + "args": { + "External id": 991086,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941418638.730, "dur": 13.810, + "args": { + "External id": 991087,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941418659.290, "dur": 1.082, + "args": { + "External id": 991088,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941418665.853, "dur": 13.555, + "args": { + "External id": 991089,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941418685.062, "dur": 1.062, + "args": { + "External id": 991090,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941418690.848, "dur": 11.975, + "args": { + "External id": 991091,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941418708.395, "dur": 1.055, + "args": { + "External id": 991092,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941418714.696, "dur": 12.876, + "args": { + "External id": 991093,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941418732.294, "dur": 1.575, + "args": { + "External id": 991094,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941418739.161, "dur": 11.652, + "args": { + "External id": 991095,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941418755.780, "dur": 0.888, + "args": { + "External id": 991096,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941418760.937, "dur": 11.963, + "args": { + "External id": 991097,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941418778.471, "dur": 0.803, + "args": { + "External id": 991098,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941418786.396, "dur": 12.194, + "args": { + "External id": 991099,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941418803.631, "dur": 1.169, + "args": { + "External id": 991100,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941418809.508, "dur": 11.884, + "args": { + "External id": 991101,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941418926.449, "dur": 3447.497, + "args": { + "External id": 991102,"Record function id": 0, "Ev Idx": 9085 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 2338708, "tid": 2379421, + "ts": 6345941418949.845, "dur": 1299.580, + "args": { + "External id": 991103,"Record function id": 0, "Ev Idx": 9086 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338708, "tid": 2379421, + "ts": 6345941418965.518, "dur": 437.687, + "args": { + "External id": 991104,"Record function id": 0, "Ev Idx": 9087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941419125.269, "dur": 8.322, + "args": { + "External id": 991105,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941419138.079, "dur": 0.976, + "args": { + "External id": 991106,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941419141.702, "dur": 1.105, + "args": { + "External id": 991107,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941419144.859, "dur": 0.908, + "args": { + "External id": 991108,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941419147.625, "dur": 0.680, + "args": { + "External id": 991109,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941419152.543, "dur": 1.244, + "args": { + "External id": 991110,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941419155.787, "dur": 0.921, + "args": { + "External id": 991111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941419158.825, "dur": 2.247, + "args": { + "External id": 991112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941419162.815, "dur": 3.187, + "args": { + "External id": 991113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941419170.200, "dur": 1.014, + "args": { + "External id": 991114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941419194.410, "dur": 170.117, + "args": { + "External id": 991115,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941419214.575, "dur": 144.007, + "args": { + "External id": 991116,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941419233.237, "dur": 16.255, + "args": { + "External id": 991117,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941419253.879, "dur": 75.059, + "args": { + "External id": 991118,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941419257.030, "dur": 71.459, + "args": { + "External id": 991119,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419261.614, "dur": 6.342, + "args": { + "External id": 991120,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941419269.981, "dur": 57.770, + "args": { + "External id": 991121,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9104 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 2338708, "tid": 2379421, + "ts": 6345941419508.689, "dur": 730.563, + "args": { + "External id": 991122,"Record function id": 0, "Ev Idx": 9105 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338708, "tid": 2379421, + "ts": 6345941419528.887, "dur": 695.390, + "args": { + "External id": 991123,"Record function id": 0, "Ev Idx": 9106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941419603.387, "dur": 6.230, + "args": { + "External id": 991124,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941419627.070, "dur": 32.578, + "args": { + "External id": 991125,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419633.313, "dur": 1.613, + "args": { + "External id": 991126,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419636.870, "dur": 0.833, + "args": { + "External id": 991127,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419639.827, "dur": 0.706, + "args": { + "External id": 991128,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419642.330, "dur": 2.620, + "args": { + "External id": 991129,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419646.240, "dur": 0.437, + "args": { + "External id": 991130,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419648.552, "dur": 0.638, + "args": { + "External id": 991131,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419651.063, "dur": 0.550, + "args": { + "External id": 991132,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419652.839, "dur": 0.662, + "args": { + "External id": 991133,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419655.300, "dur": 0.396, + "args": { + "External id": 991134,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941419671.965, "dur": 47.336, + "args": { + "External id": 991135,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941419754.223, "dur": 139.923, + "args": { + "External id": 991136,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941419766.590, "dur": 3.415, + "args": { + "External id": 991137,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941419776.997, "dur": 11.446, + "args": { + "External id": 991138,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941419781.923, "dur": 6.052, + "args": { + "External id": 991139,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419786.063, "dur": 0.562, + "args": { + "External id": 991140,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941419796.207, "dur": 43.246, + "args": { + "External id": 991141,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419811.591, "dur": 3.080, + "args": { + "External id": 991142,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419817.037, "dur": 0.684, + "args": { + "External id": 991143,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419819.634, "dur": 0.893, + "args": { + "External id": 991144,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419821.871, "dur": 0.571, + "args": { + "External id": 991145,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419824.319, "dur": 0.550, + "args": { + "External id": 991146,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419826.679, "dur": 0.466, + "args": { + "External id": 991147,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419828.305, "dur": 0.476, + "args": { + "External id": 991148,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419830.315, "dur": 0.476, + "args": { + "External id": 991149,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941419832.441, "dur": 2.991, + "args": { + "External id": 991150,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941419851.077, "dur": 34.331, + "args": { + "External id": 991151,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941419943.669, "dur": 190.299, + "args": { + "External id": 991152,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941419973.162, "dur": 155.956, + "args": { + "External id": 991153,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9136, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941419984.155, "dur": 138.863, + "args": { + "External id": 991154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941420157.228, "dur": 2.579, + "args": { + "External id": 991155,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9138, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941420258.525, "dur": 2092.224, + "args": { + "External id": 991156,"Sequence number": 10552503, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9139 + } + }, + { + "ph": "f", "id": 416, "pid": 2338708, "tid": 2379421, "ts": 6345941420258.525, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941420391.197, "dur": 121.266, + "args": { + "External id": 991157,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941420561.598, "dur": 47.068, + "args": { + "External id": 991158,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941420632.110, "dur": 57.063, + "args": { + "External id": 991159,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941420701.437, "dur": 37.997, + "args": { + "External id": 991160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941420747.281, "dur": 37.363, + "args": { + "External id": 991161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941420792.565, "dur": 31.996, + "args": { + "External id": 991162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941420835.441, "dur": 33.487, + "args": { + "External id": 991163,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941420900.677, "dur": 28.725, + "args": { + "External id": 991164,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941420953.638, "dur": 33.719, + "args": { + "External id": 991165,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941421035.068, "dur": 64.887, + "args": { + "External id": 991166,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941421123.011, "dur": 21.168, + "args": { + "External id": 991167,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941421157.684, "dur": 55.357, + "args": { + "External id": 991168,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941421217.951, "dur": 43.284, + "args": { + "External id": 991169,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941421297.038, "dur": 322.625, + "args": { + "External id": 991170,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941421394.911, "dur": 8.170, + "args": { + "External id": 991171,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941421406.226, "dur": 3.154, + "args": { + "External id": 991172,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941421410.893, "dur": 2.649, + "args": { + "External id": 991173,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941421415.019, "dur": 4.796, + "args": { + "External id": 991174,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941421483.331, "dur": 7.050, + "args": { + "External id": 991175,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941421485.668, "dur": 4.198, + "args": { + "External id": 991176,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941421492.604, "dur": 40.662, + "args": { + "External id": 991177,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941421499.811, "dur": 2.142, + "args": { + "External id": 991178,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941421535.239, "dur": 2.425, + "args": { + "External id": 991179,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941421536.762, "dur": 0.814, + "args": { + "External id": 991180,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941421538.818, "dur": 16.853, + "args": { + "External id": 991181,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941421540.891, "dur": 0.605, + "args": { + "External id": 991182,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941421666.669, "dur": 32.806, + "args": { + "External id": 991183,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941421719.281, "dur": 19.053, + "args": { + "External id": 991184,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941421749.099, "dur": 48.234, + "args": { + "External id": 991185,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941421806.512, "dur": 45.372, + "args": { + "External id": 991186,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941421861.468, "dur": 27.380, + "args": { + "External id": 991187,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941421898.950, "dur": 38.079, + "args": { + "External id": 991188,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941421946.151, "dur": 33.149, + "args": { + "External id": 991189,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941421996.819, "dur": 94.587, + "args": { + "External id": 991190,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941422119.675, "dur": 32.310, + "args": { + "External id": 991191,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941422171.543, "dur": 31.837, + "args": { + "External id": 991192,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941422221.002, "dur": 22.072, + "args": { + "External id": 991193,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941422261.105, "dur": 17.459, + "args": { + "External id": 991194,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941422294.642, "dur": 18.002, + "args": { + "External id": 991195,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422400.428, "dur": 18.971, + "args": { + "External id": 991196,"Record function id": 0, "Ev Idx": 9179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422404.065, "dur": 14.269, + "args": { + "External id": 991197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422410.212, "dur": 6.892, + "args": { + "External id": 991198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422412.269, "dur": 4.674, + "args": { + "External id": 991199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422423.765, "dur": 5.383, + "args": { + "External id": 991200,"Record function id": 0, "Ev Idx": 9183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422424.920, "dur": 3.677, + "args": { + "External id": 991201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422425.871, "dur": 2.034, + "args": { + "External id": 991202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422426.690, "dur": 1.064, + "args": { + "External id": 991203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422433.066, "dur": 4.829, + "args": { + "External id": 991204,"Record function id": 0, "Ev Idx": 9187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422434.508, "dur": 2.874, + "args": { + "External id": 991205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422435.103, "dur": 1.747, + "args": { + "External id": 991206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422435.774, "dur": 1.000, + "args": { + "External id": 991207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422441.579, "dur": 4.455, + "args": { + "External id": 991208,"Record function id": 0, "Ev Idx": 9191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422442.643, "dur": 2.863, + "args": { + "External id": 991209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422443.295, "dur": 1.614, + "args": { + "External id": 991210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422443.934, "dur": 0.897, + "args": { + "External id": 991211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422449.599, "dur": 4.348, + "args": { + "External id": 991212,"Record function id": 0, "Ev Idx": 9195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422451.212, "dur": 2.228, + "args": { + "External id": 991213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422451.791, "dur": 1.044, + "args": { + "External id": 991214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422452.140, "dur": 0.619, + "args": { + "External id": 991215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422457.732, "dur": 7.718, + "args": { + "External id": 991216,"Record function id": 0, "Ev Idx": 9199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422458.791, "dur": 6.148, + "args": { + "External id": 991217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422459.404, "dur": 4.881, + "args": { + "External id": 991218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422460.105, "dur": 4.064, + "args": { + "External id": 991219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422469.357, "dur": 4.340, + "args": { + "External id": 991220,"Record function id": 0, "Ev Idx": 9203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422470.572, "dur": 2.590, + "args": { + "External id": 991221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422471.280, "dur": 1.372, + "args": { + "External id": 991222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422471.677, "dur": 0.894, + "args": { + "External id": 991223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422477.826, "dur": 4.130, + "args": { + "External id": 991224,"Record function id": 0, "Ev Idx": 9207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422479.026, "dur": 2.412, + "args": { + "External id": 991225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422479.711, "dur": 1.109, + "args": { + "External id": 991226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422480.075, "dur": 0.662, + "args": { + "External id": 991227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422486.066, "dur": 4.471, + "args": { + "External id": 991228,"Record function id": 0, "Ev Idx": 9211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941422487.403, "dur": 2.618, + "args": { + "External id": 991229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422488.120, "dur": 1.155, + "args": { + "External id": 991230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941422488.609, "dur": 0.516, + "args": { + "External id": 991231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941422495.720, "dur": 60425.877, + "args": { + "External id": 991232,"Record function id": 0, "Sequence number": 10552502, "Fwd thread id": 1, "Ev Idx": 9215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941422513.116, "dur": 60396.952, + "args": { + "External id": 991233,"Sequence number": 10552502, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9216 + } + }, + { + "ph": "f", "id": 417, "pid": 2338708, "tid": 2379421, "ts": 6345941422513.116, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 2338708, "tid": 2379421, + "ts": 6345941422548.892, "dur": 47.443, + "args": { + "External id": 991234,"Record function id": 0, "Ev Idx": 9217 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 2338708, "tid": 2379421, + "ts": 6345941422605.632, "dur": 80.848, + "args": { + "External id": 991235,"Record function id": 0, "Ev Idx": 9218 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 2338708, "tid": 2379421, + "ts": 6345941422693.528, "dur": 60205.709, + "args": { + "External id": 991236,"Record function id": 0, "Ev Idx": 9219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941422808.094, "dur": 8.180, + "args": { + "External id": 991237,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941422828.240, "dur": 5.062, + "args": { + "External id": 991238,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941422849.772, "dur": 58957.628, + "args": { + "External id": 991239,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941422866.797, "dur": 58925.538, + "args": { + "External id": 991240,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941422987.342, "dur": 39.932, + "args": { + "External id": 991241,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941423102.041, "dur": 58643.068, + "args": { + "External id": 991242,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941423106.448, "dur": 58637.540, + "args": { + "External id": 991243,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941423112.898, "dur": 12.798, + "args": { + "External id": 991244,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941423128.687, "dur": 58610.128, + "args": { + "External id": 991245,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941481935.131, "dur": 13.313, + "args": { + "External id": 991246,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941481939.048, "dur": 8.926, + "args": { + "External id": 991247,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941481984.881, "dur": 453.591, + "args": { + "External id": 991248,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941482032.867, "dur": 399.554, + "args": { + "External id": 991249,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9232, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941482047.245, "dur": 378.684, + "args": { + "External id": 991250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941482465.652, "dur": 2.537, + "args": { + "External id": 991251,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9234, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941482543.240, "dur": 7.770, + "args": { + "External id": 991252,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941482565.535, "dur": 39.769, + "args": { + "External id": 991253,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941482617.754, "dur": 4.355, + "args": { + "External id": 991254,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941482629.337, "dur": 31.468, + "args": { + "External id": 991255,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941482668.170, "dur": 1.121, + "args": { + "External id": 991256,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941482673.988, "dur": 12.861, + "args": { + "External id": 991257,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941482692.387, "dur": 1.349, + "args": { + "External id": 991258,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941482698.061, "dur": 12.302, + "args": { + "External id": 991259,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941482715.543, "dur": 1.087, + "args": { + "External id": 991260,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941482721.116, "dur": 12.473, + "args": { + "External id": 991261,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941482739.063, "dur": 1.135, + "args": { + "External id": 991262,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941482745.336, "dur": 11.037, + "args": { + "External id": 991263,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941482762.071, "dur": 1.340, + "args": { + "External id": 991264,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941482767.716, "dur": 11.950, + "args": { + "External id": 991265,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941482785.524, "dur": 1.102, + "args": { + "External id": 991266,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941482792.296, "dur": 11.004, + "args": { + "External id": 991267,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941482808.337, "dur": 1.186, + "args": { + "External id": 991268,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941482814.504, "dur": 11.850, + "args": { + "External id": 991269,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941482939.330, "dur": 3560.687, + "args": { + "External id": 991270,"Record function id": 0, "Ev Idx": 9253 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 2338708, "tid": 2379421, + "ts": 6345941482965.583, "dur": 1391.418, + "args": { + "External id": 991271,"Record function id": 0, "Ev Idx": 9254 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338708, "tid": 2379421, + "ts": 6345941482984.318, "dur": 451.884, + "args": { + "External id": 991272,"Record function id": 0, "Ev Idx": 9255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941483154.818, "dur": 8.371, + "args": { + "External id": 991273,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941483167.422, "dur": 1.347, + "args": { + "External id": 991274,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941483171.430, "dur": 0.821, + "args": { + "External id": 991275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941483174.045, "dur": 1.084, + "args": { + "External id": 991276,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941483177.191, "dur": 1.175, + "args": { + "External id": 991277,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941483180.093, "dur": 1.038, + "args": { + "External id": 991278,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941483182.914, "dur": 1.004, + "args": { + "External id": 991279,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941483185.796, "dur": 2.760, + "args": { + "External id": 991280,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941483192.055, "dur": 2.993, + "args": { + "External id": 991281,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941483196.967, "dur": 0.694, + "args": { + "External id": 991282,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941483219.631, "dur": 178.072, + "args": { + "External id": 991283,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941483239.685, "dur": 151.523, + "args": { + "External id": 991284,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941483259.702, "dur": 17.912, + "args": { + "External id": 991285,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941483282.462, "dur": 78.430, + "args": { + "External id": 991286,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941483285.711, "dur": 74.711, + "args": { + "External id": 991287,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483290.925, "dur": 6.964, + "args": { + "External id": 991288,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941483299.991, "dur": 59.598, + "args": { + "External id": 991289,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9272 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 2338708, "tid": 2379421, + "ts": 6345941483545.636, "dur": 800.972, + "args": { + "External id": 991290,"Record function id": 0, "Ev Idx": 9273 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338708, "tid": 2379421, + "ts": 6345941483568.813, "dur": 761.776, + "args": { + "External id": 991291,"Record function id": 0, "Ev Idx": 9274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941483644.018, "dur": 6.172, + "args": { + "External id": 991292,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941483667.916, "dur": 32.827, + "args": { + "External id": 991293,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483673.775, "dur": 2.174, + "args": { + "External id": 991294,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483678.354, "dur": 0.584, + "args": { + "External id": 991295,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483680.166, "dur": 0.803, + "args": { + "External id": 991296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483683.153, "dur": 3.325, + "args": { + "External id": 991297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483688.379, "dur": 0.725, + "args": { + "External id": 991298,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483690.499, "dur": 0.565, + "args": { + "External id": 991299,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483692.911, "dur": 0.474, + "args": { + "External id": 991300,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483695.069, "dur": 0.490, + "args": { + "External id": 991301,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483696.595, "dur": 0.534, + "args": { + "External id": 991302,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941483713.455, "dur": 48.531, + "args": { + "External id": 991303,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941483797.521, "dur": 122.440, + "args": { + "External id": 991304,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941483808.917, "dur": 3.650, + "args": { + "External id": 991305,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941483819.208, "dur": 11.058, + "args": { + "External id": 991306,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941483823.896, "dur": 5.876, + "args": { + "External id": 991307,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483827.560, "dur": 0.705, + "args": { + "External id": 991308,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941483838.555, "dur": 28.239, + "args": { + "External id": 991309,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483840.728, "dur": 3.088, + "args": { + "External id": 991310,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483845.294, "dur": 0.678, + "args": { + "External id": 991311,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483847.938, "dur": 0.515, + "args": { + "External id": 991312,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483850.084, "dur": 0.489, + "args": { + "External id": 991313,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483851.660, "dur": 0.729, + "args": { + "External id": 991314,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483854.131, "dur": 0.458, + "args": { + "External id": 991315,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483856.113, "dur": 0.531, + "args": { + "External id": 991316,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483857.837, "dur": 0.529, + "args": { + "External id": 991317,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941483860.333, "dur": 2.618, + "args": { + "External id": 991318,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941483878.123, "dur": 33.173, + "args": { + "External id": 991319,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941483970.507, "dur": 263.057, + "args": { + "External id": 991320,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941483998.579, "dur": 229.459, + "args": { + "External id": 991321,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9304, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941484029.777, "dur": 192.261, + "args": { + "External id": 991322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941484257.977, "dur": 2.466, + "args": { + "External id": 991323,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9306, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941484367.034, "dur": 2110.185, + "args": { + "External id": 991324,"Sequence number": 10552501, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9307 + } + }, + { + "ph": "f", "id": 418, "pid": 2338708, "tid": 2379421, "ts": 6345941484367.034, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941484500.253, "dur": 120.527, + "args": { + "External id": 991325,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941484667.476, "dur": 46.309, + "args": { + "External id": 991326,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941484737.933, "dur": 56.616, + "args": { + "External id": 991327,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941484805.827, "dur": 36.569, + "args": { + "External id": 991328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941484849.636, "dur": 37.774, + "args": { + "External id": 991329,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941484894.768, "dur": 31.565, + "args": { + "External id": 991330,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941484936.942, "dur": 36.061, + "args": { + "External id": 991331,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941485001.237, "dur": 88.513, + "args": { + "External id": 991332,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941485120.437, "dur": 36.162, + "args": { + "External id": 991333,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941485187.990, "dur": 21.661, + "args": { + "External id": 991334,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941485226.476, "dur": 17.672, + "args": { + "External id": 991335,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941485257.968, "dur": 50.121, + "args": { + "External id": 991336,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941485312.858, "dur": 52.774, + "args": { + "External id": 991337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941485409.008, "dur": 293.543, + "args": { + "External id": 991338,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941485507.220, "dur": 7.465, + "args": { + "External id": 991339,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941485517.084, "dur": 3.395, + "args": { + "External id": 991340,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941485525.395, "dur": 2.715, + "args": { + "External id": 991341,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941485529.457, "dur": 4.715, + "args": { + "External id": 991342,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941485584.257, "dur": 5.690, + "args": { + "External id": 991343,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941485586.407, "dur": 3.326, + "args": { + "External id": 991344,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941485592.086, "dur": 37.430, + "args": { + "External id": 991345,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941485598.636, "dur": 2.289, + "args": { + "External id": 991346,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941485631.374, "dur": 1.975, + "args": { + "External id": 991347,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941485632.332, "dur": 0.924, + "args": { + "External id": 991348,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941485634.524, "dur": 18.595, + "args": { + "External id": 991349,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941485637.260, "dur": 0.445, + "args": { + "External id": 991350,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941485740.659, "dur": 35.146, + "args": { + "External id": 991351,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941485795.020, "dur": 20.580, + "args": { + "External id": 991352,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941485825.408, "dur": 48.086, + "args": { + "External id": 991353,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941485882.732, "dur": 44.841, + "args": { + "External id": 991354,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941485949.191, "dur": 27.955, + "args": { + "External id": 991355,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941485984.458, "dur": 60.929, + "args": { + "External id": 991356,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941486113.938, "dur": 49.437, + "args": { + "External id": 991357,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941486174.685, "dur": 36.228, + "args": { + "External id": 991358,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941486238.669, "dur": 34.310, + "args": { + "External id": 991359,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941486295.949, "dur": 31.037, + "args": { + "External id": 991360,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941486346.130, "dur": 20.406, + "args": { + "External id": 991361,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941486385.307, "dur": 17.779, + "args": { + "External id": 991362,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941486421.984, "dur": 18.460, + "args": { + "External id": 991363,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486525.021, "dur": 17.405, + "args": { + "External id": 991364,"Record function id": 0, "Ev Idx": 9347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486528.821, "dur": 12.635, + "args": { + "External id": 991365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486533.627, "dur": 6.920, + "args": { + "External id": 991366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486535.821, "dur": 4.592, + "args": { + "External id": 991367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486546.835, "dur": 6.335, + "args": { + "External id": 991368,"Record function id": 0, "Ev Idx": 9351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486548.438, "dur": 4.110, + "args": { + "External id": 991369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486549.165, "dur": 2.785, + "args": { + "External id": 991370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486550.336, "dur": 1.489, + "args": { + "External id": 991371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486557.002, "dur": 4.958, + "args": { + "External id": 991372,"Record function id": 0, "Ev Idx": 9355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486558.292, "dur": 3.215, + "args": { + "External id": 991373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486559.120, "dur": 1.887, + "args": { + "External id": 991374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486559.695, "dur": 1.236, + "args": { + "External id": 991375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486565.818, "dur": 6.735, + "args": { + "External id": 991376,"Record function id": 0, "Ev Idx": 9359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486567.433, "dur": 4.594, + "args": { + "External id": 991377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486568.279, "dur": 3.154, + "args": { + "External id": 991378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486568.646, "dur": 2.706, + "args": { + "External id": 991379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486576.244, "dur": 4.297, + "args": { + "External id": 991380,"Record function id": 0, "Ev Idx": 9363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486577.471, "dur": 2.572, + "args": { + "External id": 991381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486578.083, "dur": 1.471, + "args": { + "External id": 991382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486578.566, "dur": 0.908, + "args": { + "External id": 991383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486584.379, "dur": 4.142, + "args": { + "External id": 991384,"Record function id": 0, "Ev Idx": 9367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486585.448, "dur": 2.598, + "args": { + "External id": 991385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486586.092, "dur": 1.416, + "args": { + "External id": 991386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486586.637, "dur": 0.749, + "args": { + "External id": 991387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486592.408, "dur": 4.278, + "args": { + "External id": 991388,"Record function id": 0, "Ev Idx": 9371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486594.161, "dur": 2.088, + "args": { + "External id": 991389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486594.702, "dur": 1.080, + "args": { + "External id": 991390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486595.055, "dur": 0.634, + "args": { + "External id": 991391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486600.335, "dur": 4.481, + "args": { + "External id": 991392,"Record function id": 0, "Ev Idx": 9375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486601.673, "dur": 2.660, + "args": { + "External id": 991393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486602.363, "dur": 1.481, + "args": { + "External id": 991394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486602.892, "dur": 0.846, + "args": { + "External id": 991395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486608.930, "dur": 4.863, + "args": { + "External id": 991396,"Record function id": 0, "Ev Idx": 9379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941486610.312, "dur": 2.983, + "args": { + "External id": 991397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486611.023, "dur": 1.803, + "args": { + "External id": 991398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941486611.926, "dur": 0.762, + "args": { + "External id": 991399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941486618.916, "dur": 60218.577, + "args": { + "External id": 991400,"Record function id": 0, "Sequence number": 10552500, "Fwd thread id": 1, "Ev Idx": 9383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941486620.564, "dur": 60205.978, + "args": { + "External id": 991401,"Sequence number": 10552500, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9384 + } + }, + { + "ph": "f", "id": 419, "pid": 2338708, "tid": 2379421, "ts": 6345941486620.564, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 2338708, "tid": 2379421, + "ts": 6345941486655.668, "dur": 44.288, + "args": { + "External id": 991402,"Record function id": 0, "Ev Idx": 9385 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 2338708, "tid": 2379421, + "ts": 6345941486708.394, "dur": 87.312, + "args": { + "External id": 991403,"Record function id": 0, "Ev Idx": 9386 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 2338708, "tid": 2379421, + "ts": 6345941486803.276, "dur": 60014.618, + "args": { + "External id": 991404,"Record function id": 0, "Ev Idx": 9387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941486911.006, "dur": 7.815, + "args": { + "External id": 991405,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941486930.175, "dur": 5.302, + "args": { + "External id": 991406,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941486952.509, "dur": 58702.321, + "args": { + "External id": 991407,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941486968.792, "dur": 58670.485, + "args": { + "External id": 991408,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941487165.595, "dur": 25.454, + "args": { + "External id": 991409,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941487219.545, "dur": 58373.428, + "args": { + "External id": 991410,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941487223.177, "dur": 58368.576, + "args": { + "External id": 991411,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941487228.497, "dur": 16.275, + "args": { + "External id": 991412,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941487247.024, "dur": 58339.677, + "args": { + "External id": 991413,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941545781.163, "dur": 12.337, + "args": { + "External id": 991414,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941545784.913, "dur": 8.111, + "args": { + "External id": 991415,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941545829.519, "dur": 534.078, + "args": { + "External id": 991416,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941545867.412, "dur": 489.367, + "args": { + "External id": 991417,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9400, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941545879.677, "dur": 468.354, + "args": { + "External id": 991418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941546391.822, "dur": 3.076, + "args": { + "External id": 991419,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9402, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941546473.445, "dur": 7.904, + "args": { + "External id": 991420,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941546495.548, "dur": 39.090, + "args": { + "External id": 991421,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941546546.962, "dur": 1.933, + "args": { + "External id": 991422,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941546555.652, "dur": 14.961, + "args": { + "External id": 991423,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941546577.837, "dur": 1.332, + "args": { + "External id": 991424,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941546584.331, "dur": 12.964, + "args": { + "External id": 991425,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941546602.871, "dur": 1.041, + "args": { + "External id": 991426,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941546609.183, "dur": 12.025, + "args": { + "External id": 991427,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941546626.712, "dur": 1.250, + "args": { + "External id": 991428,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941546632.919, "dur": 14.788, + "args": { + "External id": 991429,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941546652.694, "dur": 1.702, + "args": { + "External id": 991430,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941546658.974, "dur": 12.774, + "args": { + "External id": 991431,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941546678.957, "dur": 3.362, + "args": { + "External id": 991432,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941546687.186, "dur": 14.050, + "args": { + "External id": 991433,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941546706.359, "dur": 0.771, + "args": { + "External id": 991434,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941546712.357, "dur": 12.406, + "args": { + "External id": 991435,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941546729.643, "dur": 0.979, + "args": { + "External id": 991436,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941546735.485, "dur": 13.793, + "args": { + "External id": 991437,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941546854.541, "dur": 3393.555, + "args": { + "External id": 991438,"Record function id": 0, "Ev Idx": 9421 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 2338708, "tid": 2379421, + "ts": 6345941546879.408, "dur": 1276.406, + "args": { + "External id": 991439,"Record function id": 0, "Ev Idx": 9422 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338708, "tid": 2379421, + "ts": 6345941546896.552, "dur": 431.980, + "args": { + "External id": 991440,"Record function id": 0, "Ev Idx": 9423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941546994.214, "dur": 4.646, + "args": { + "External id": 991441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941547002.487, "dur": 1.271, + "args": { + "External id": 991442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941547006.253, "dur": 0.840, + "args": { + "External id": 991443,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941547030.353, "dur": 1.608, + "args": { + "External id": 991444,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941547034.014, "dur": 1.304, + "args": { + "External id": 991445,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941547037.120, "dur": 3.250, + "args": { + "External id": 991446,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941547042.284, "dur": 0.969, + "args": { + "External id": 991447,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941547047.251, "dur": 2.359, + "args": { + "External id": 991448,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941547051.334, "dur": 35.342, + "args": { + "External id": 991449,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941547091.720, "dur": 1.354, + "args": { + "External id": 991450,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941547115.453, "dur": 174.429, + "args": { + "External id": 991451,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941547135.652, "dur": 148.390, + "args": { + "External id": 991452,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941547155.680, "dur": 17.334, + "args": { + "External id": 991453,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941547177.370, "dur": 76.950, + "args": { + "External id": 991454,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941547180.674, "dur": 73.254, + "args": { + "External id": 991455,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547185.928, "dur": 7.010, + "args": { + "External id": 991456,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941547195.103, "dur": 58.149, + "args": { + "External id": 991457,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9440 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 2338708, "tid": 2379421, + "ts": 6345941547434.994, "dur": 711.568, + "args": { + "External id": 991458,"Record function id": 0, "Ev Idx": 9441 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338708, "tid": 2379421, + "ts": 6345941547453.758, "dur": 677.178, + "args": { + "External id": 991459,"Record function id": 0, "Ev Idx": 9442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941547524.977, "dur": 6.671, + "args": { + "External id": 991460,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941547549.091, "dur": 32.665, + "args": { + "External id": 991461,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547554.719, "dur": 4.037, + "args": { + "External id": 991462,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547561.141, "dur": 0.540, + "args": { + "External id": 991463,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547563.722, "dur": 0.435, + "args": { + "External id": 991464,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547565.375, "dur": 0.673, + "args": { + "External id": 991465,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547567.768, "dur": 0.502, + "args": { + "External id": 991466,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547570.082, "dur": 0.352, + "args": { + "External id": 991467,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547571.397, "dur": 0.547, + "args": { + "External id": 991468,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547573.585, "dur": 0.370, + "args": { + "External id": 991469,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547575.490, "dur": 2.597, + "args": { + "External id": 991470,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941547593.689, "dur": 48.419, + "args": { + "External id": 991471,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941547678.132, "dur": 128.127, + "args": { + "External id": 991472,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941547689.614, "dur": 3.400, + "args": { + "External id": 991473,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941547699.577, "dur": 11.405, + "args": { + "External id": 991474,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941547704.722, "dur": 5.791, + "args": { + "External id": 991475,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547708.413, "dur": 0.855, + "args": { + "External id": 991476,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941547718.612, "dur": 32.545, + "args": { + "External id": 991477,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547720.415, "dur": 0.900, + "args": { + "External id": 991478,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547723.356, "dur": 0.489, + "args": { + "External id": 991479,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547725.846, "dur": 0.590, + "args": { + "External id": 991480,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547727.617, "dur": 0.555, + "args": { + "External id": 991481,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547729.978, "dur": 0.414, + "args": { + "External id": 991482,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547737.951, "dur": 2.708, + "args": { + "External id": 991483,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547742.345, "dur": 0.425, + "args": { + "External id": 991484,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547744.480, "dur": 0.421, + "args": { + "External id": 991485,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941547746.727, "dur": 0.433, + "args": { + "External id": 991486,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941547762.607, "dur": 35.221, + "args": { + "External id": 991487,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941547855.443, "dur": 129.963, + "args": { + "External id": 991488,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941547884.574, "dur": 96.884, + "args": { + "External id": 991489,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9472, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941547894.856, "dur": 81.825, + "args": { + "External id": 991490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941548003.238, "dur": 2.495, + "args": { + "External id": 991491,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9474, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941548164.797, "dur": 2060.366, + "args": { + "External id": 991492,"Sequence number": 10552499, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9475 + } + }, + { + "ph": "f", "id": 420, "pid": 2338708, "tid": 2379421, "ts": 6345941548164.797, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941548293.370, "dur": 127.652, + "args": { + "External id": 991493,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941548470.625, "dur": 46.113, + "args": { + "External id": 991494,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941548535.898, "dur": 53.790, + "args": { + "External id": 991495,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941548601.482, "dur": 35.478, + "args": { + "External id": 991496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941548644.685, "dur": 35.583, + "args": { + "External id": 991497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941548690.433, "dur": 32.263, + "args": { + "External id": 991498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941548730.503, "dur": 32.935, + "args": { + "External id": 991499,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941548792.027, "dur": 27.129, + "args": { + "External id": 991500,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941548841.252, "dur": 33.844, + "args": { + "External id": 991501,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941548900.381, "dur": 22.402, + "args": { + "External id": 991502,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941548940.904, "dur": 17.627, + "args": { + "External id": 991503,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941548968.696, "dur": 62.804, + "args": { + "External id": 991504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941549038.699, "dur": 84.507, + "args": { + "External id": 991505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941549163.478, "dur": 319.429, + "args": { + "External id": 991506,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941549275.393, "dur": 10.673, + "args": { + "External id": 991507,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941549289.253, "dur": 3.327, + "args": { + "External id": 991508,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941549294.216, "dur": 2.350, + "args": { + "External id": 991509,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941549298.054, "dur": 2.393, + "args": { + "External id": 991510,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941549359.303, "dur": 5.701, + "args": { + "External id": 991511,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941549361.400, "dur": 3.345, + "args": { + "External id": 991512,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941549367.172, "dur": 36.780, + "args": { + "External id": 991513,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941549374.005, "dur": 1.915, + "args": { + "External id": 991514,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941549405.774, "dur": 2.038, + "args": { + "External id": 991515,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941549407.020, "dur": 0.659, + "args": { + "External id": 991516,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941549409.014, "dur": 20.267, + "args": { + "External id": 991517,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941549411.234, "dur": 4.158, + "args": { + "External id": 991518,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941549526.286, "dur": 35.002, + "args": { + "External id": 991519,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941549590.054, "dur": 21.117, + "args": { + "External id": 991520,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941549621.541, "dur": 53.401, + "args": { + "External id": 991521,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941549683.362, "dur": 47.878, + "args": { + "External id": 991522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941549744.212, "dur": 26.703, + "args": { + "External id": 991523,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941549778.508, "dur": 37.974, + "args": { + "External id": 991524,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941549825.940, "dur": 34.562, + "args": { + "External id": 991525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941549869.362, "dur": 37.086, + "args": { + "External id": 991526,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941549927.182, "dur": 30.116, + "args": { + "External id": 991527,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941549976.526, "dur": 50.318, + "args": { + "External id": 991528,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941550048.954, "dur": 61.622, + "args": { + "External id": 991529,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941550134.637, "dur": 17.623, + "args": { + "External id": 991530,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941550167.747, "dur": 18.736, + "args": { + "External id": 991531,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550274.751, "dur": 18.302, + "args": { + "External id": 991532,"Record function id": 0, "Ev Idx": 9515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550278.890, "dur": 13.100, + "args": { + "External id": 991533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550283.928, "dur": 6.814, + "args": { + "External id": 991534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550285.998, "dur": 4.586, + "args": { + "External id": 991535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550297.766, "dur": 5.862, + "args": { + "External id": 991536,"Record function id": 0, "Ev Idx": 9519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550299.562, "dur": 3.541, + "args": { + "External id": 991537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550300.330, "dur": 2.204, + "args": { + "External id": 991538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550301.149, "dur": 1.243, + "args": { + "External id": 991539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550307.644, "dur": 5.586, + "args": { + "External id": 991540,"Record function id": 0, "Ev Idx": 9523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550309.441, "dur": 3.249, + "args": { + "External id": 991541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550310.054, "dur": 1.976, + "args": { + "External id": 991542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550310.434, "dur": 1.509, + "args": { + "External id": 991543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550317.163, "dur": 4.398, + "args": { + "External id": 991544,"Record function id": 0, "Ev Idx": 9527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550318.521, "dur": 2.550, + "args": { + "External id": 991545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550319.293, "dur": 1.178, + "args": { + "External id": 991546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550319.665, "dur": 0.696, + "args": { + "External id": 991547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550325.336, "dur": 6.741, + "args": { + "External id": 991548,"Record function id": 0, "Ev Idx": 9531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550326.701, "dur": 4.901, + "args": { + "External id": 991549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550327.329, "dur": 3.662, + "args": { + "External id": 991550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550327.729, "dur": 3.181, + "args": { + "External id": 991551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550336.104, "dur": 5.472, + "args": { + "External id": 991552,"Record function id": 0, "Ev Idx": 9535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550337.773, "dur": 3.271, + "args": { + "External id": 991553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550338.434, "dur": 1.965, + "args": { + "External id": 991554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550339.228, "dur": 1.032, + "args": { + "External id": 991555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550345.606, "dur": 4.827, + "args": { + "External id": 991556,"Record function id": 0, "Ev Idx": 9539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550347.098, "dur": 2.788, + "args": { + "External id": 991557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550347.830, "dur": 1.493, + "args": { + "External id": 991558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550348.410, "dur": 0.835, + "args": { + "External id": 991559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550354.423, "dur": 4.767, + "args": { + "External id": 991560,"Record function id": 0, "Ev Idx": 9543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550355.936, "dur": 2.726, + "args": { + "External id": 991561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550356.757, "dur": 1.301, + "args": { + "External id": 991562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550357.310, "dur": 0.637, + "args": { + "External id": 991563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550363.790, "dur": 5.067, + "args": { + "External id": 991564,"Record function id": 0, "Ev Idx": 9547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941550365.199, "dur": 3.148, + "args": { + "External id": 991565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550365.793, "dur": 1.707, + "args": { + "External id": 991566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941550366.469, "dur": 0.886, + "args": { + "External id": 991567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941550374.170, "dur": 60591.310, + "args": { + "External id": 991568,"Record function id": 0, "Sequence number": 10552498, "Fwd thread id": 1, "Ev Idx": 9551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941550376.263, "dur": 60578.582, + "args": { + "External id": 991569,"Sequence number": 10552498, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9552 + } + }, + { + "ph": "f", "id": 421, "pid": 2338708, "tid": 2379421, "ts": 6345941550376.263, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 2338708, "tid": 2379421, + "ts": 6345941550411.455, "dur": 47.650, + "args": { + "External id": 991570,"Record function id": 0, "Ev Idx": 9553 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 2338708, "tid": 2379421, + "ts": 6345941550468.193, "dur": 79.086, + "args": { + "External id": 991571,"Record function id": 0, "Ev Idx": 9554 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 2338708, "tid": 2379421, + "ts": 6345941550553.988, "dur": 60390.725, + "args": { + "External id": 991572,"Record function id": 0, "Ev Idx": 9555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941550660.903, "dur": 8.208, + "args": { + "External id": 991573,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941550680.778, "dur": 5.606, + "args": { + "External id": 991574,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941550703.590, "dur": 59154.778, + "args": { + "External id": 991575,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941550719.871, "dur": 59122.778, + "args": { + "External id": 991576,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941550833.457, "dur": 20.464, + "args": { + "External id": 991577,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941550878.046, "dur": 58912.346, + "args": { + "External id": 991578,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941550882.455, "dur": 58906.743, + "args": { + "External id": 991579,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941550888.304, "dur": 8.623, + "args": { + "External id": 991580,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941550901.693, "dur": 58881.728, + "args": { + "External id": 991581,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941609984.694, "dur": 14.483, + "args": { + "External id": 991582,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941609988.635, "dur": 9.987, + "args": { + "External id": 991583,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941610050.007, "dur": 441.379, + "args": { + "External id": 991584,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941610114.593, "dur": 370.263, + "args": { + "External id": 991585,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9568, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941610130.704, "dur": 347.402, + "args": { + "External id": 991586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941610515.325, "dur": 2.796, + "args": { + "External id": 991587,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9570, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941610592.429, "dur": 10.620, + "args": { + "External id": 991588,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941610617.598, "dur": 42.277, + "args": { + "External id": 991589,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941610671.773, "dur": 2.132, + "args": { + "External id": 991590,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941610681.013, "dur": 14.988, + "args": { + "External id": 991591,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941610702.634, "dur": 1.305, + "args": { + "External id": 991592,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941610708.981, "dur": 15.406, + "args": { + "External id": 991593,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941610729.594, "dur": 0.994, + "args": { + "External id": 991594,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941610735.392, "dur": 12.786, + "args": { + "External id": 991595,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941610753.344, "dur": 1.246, + "args": { + "External id": 991596,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941610759.395, "dur": 13.509, + "args": { + "External id": 991597,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941610777.327, "dur": 1.466, + "args": { + "External id": 991598,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941610783.161, "dur": 13.667, + "args": { + "External id": 991599,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941610801.815, "dur": 0.938, + "args": { + "External id": 991600,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941610808.116, "dur": 13.155, + "args": { + "External id": 991601,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941610828.689, "dur": 1.093, + "args": { + "External id": 991602,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941610834.775, "dur": 13.047, + "args": { + "External id": 991603,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941610852.542, "dur": 3.800, + "args": { + "External id": 991604,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941610860.992, "dur": 14.122, + "args": { + "External id": 991605,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941610982.139, "dur": 3455.398, + "args": { + "External id": 991606,"Record function id": 0, "Ev Idx": 9589 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 2338708, "tid": 2379421, + "ts": 6345941611026.255, "dur": 1277.851, + "args": { + "External id": 991607,"Record function id": 0, "Ev Idx": 9590 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338708, "tid": 2379421, + "ts": 6345941611048.045, "dur": 424.613, + "args": { + "External id": 991608,"Record function id": 0, "Ev Idx": 9591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941611193.497, "dur": 6.628, + "args": { + "External id": 991609,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941611204.014, "dur": 1.128, + "args": { + "External id": 991610,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941611207.455, "dur": 0.988, + "args": { + "External id": 991611,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941611210.238, "dur": 1.099, + "args": { + "External id": 991612,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941611212.934, "dur": 1.362, + "args": { + "External id": 991613,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941611218.654, "dur": 1.265, + "args": { + "External id": 991614,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941611221.747, "dur": 1.167, + "args": { + "External id": 991615,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941611224.443, "dur": 4.587, + "args": { + "External id": 991616,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941611230.851, "dur": 0.844, + "args": { + "External id": 991617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941611235.143, "dur": 0.831, + "args": { + "External id": 991618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941611258.351, "dur": 174.466, + "args": { + "External id": 991619,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941611279.092, "dur": 148.528, + "args": { + "External id": 991620,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941611298.886, "dur": 16.185, + "args": { + "External id": 991621,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941611319.443, "dur": 76.783, + "args": { + "External id": 991622,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941611322.537, "dur": 73.261, + "args": { + "External id": 991623,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611327.756, "dur": 6.138, + "args": { + "External id": 991624,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941611336.277, "dur": 58.733, + "args": { + "External id": 991625,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9608 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 2338708, "tid": 2379421, + "ts": 6345941611581.782, "dur": 713.260, + "args": { + "External id": 991626,"Record function id": 0, "Ev Idx": 9609 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338708, "tid": 2379421, + "ts": 6345941611603.578, "dur": 677.009, + "args": { + "External id": 991627,"Record function id": 0, "Ev Idx": 9610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941611671.098, "dur": 7.353, + "args": { + "External id": 991628,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941611697.342, "dur": 34.585, + "args": { + "External id": 991629,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611704.557, "dur": 2.204, + "args": { + "External id": 991630,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611710.091, "dur": 0.660, + "args": { + "External id": 991631,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611712.634, "dur": 2.917, + "args": { + "External id": 991632,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611716.609, "dur": 0.712, + "args": { + "External id": 991633,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611719.071, "dur": 0.585, + "args": { + "External id": 991634,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611721.551, "dur": 0.370, + "args": { + "External id": 991635,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611722.937, "dur": 0.515, + "args": { + "External id": 991636,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611725.273, "dur": 0.413, + "args": { + "External id": 991637,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611727.569, "dur": 0.369, + "args": { + "External id": 991638,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941611744.259, "dur": 48.990, + "args": { + "External id": 991639,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941611829.402, "dur": 120.797, + "args": { + "External id": 991640,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941611840.615, "dur": 3.919, + "args": { + "External id": 991641,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941611851.186, "dur": 13.691, + "args": { + "External id": 991642,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941611855.943, "dur": 8.458, + "args": { + "External id": 991643,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611859.830, "dur": 3.354, + "args": { + "External id": 991644,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941611872.364, "dur": 25.529, + "args": { + "External id": 991645,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611874.413, "dur": 0.699, + "args": { + "External id": 991646,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611877.159, "dur": 0.468, + "args": { + "External id": 991647,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611878.851, "dur": 0.746, + "args": { + "External id": 991648,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611881.441, "dur": 0.706, + "args": { + "External id": 991649,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611883.724, "dur": 0.641, + "args": { + "External id": 991650,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611885.523, "dur": 0.617, + "args": { + "External id": 991651,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611887.504, "dur": 0.474, + "args": { + "External id": 991652,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611889.971, "dur": 2.677, + "args": { + "External id": 991653,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941611893.658, "dur": 0.401, + "args": { + "External id": 991654,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941611909.205, "dur": 32.796, + "args": { + "External id": 991655,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941611998.053, "dur": 196.253, + "args": { + "External id": 991656,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941612044.913, "dur": 144.992, + "args": { + "External id": 991657,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9640, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941612094.627, "dur": 90.413, + "args": { + "External id": 991658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941612213.502, "dur": 2.006, + "args": { + "External id": 991659,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9642, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941612312.303, "dur": 2100.934, + "args": { + "External id": 991660,"Sequence number": 10552497, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9643 + } + }, + { + "ph": "f", "id": 422, "pid": 2338708, "tid": 2379421, "ts": 6345941612312.303, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941612443.608, "dur": 122.970, + "args": { + "External id": 991661,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941612614.036, "dur": 47.681, + "args": { + "External id": 991662,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941612685.695, "dur": 58.802, + "args": { + "External id": 991663,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941612755.862, "dur": 37.785, + "args": { + "External id": 991664,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941612801.281, "dur": 37.266, + "args": { + "External id": 991665,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941612846.238, "dur": 34.157, + "args": { + "External id": 991666,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941612890.036, "dur": 33.459, + "args": { + "External id": 991667,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941612953.247, "dur": 28.672, + "args": { + "External id": 991668,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941613003.867, "dur": 98.035, + "args": { + "External id": 991669,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941613136.195, "dur": 26.546, + "args": { + "External id": 991670,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941613180.509, "dur": 18.585, + "args": { + "External id": 991671,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941613213.042, "dur": 51.874, + "args": { + "External id": 991672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941613269.949, "dur": 39.215, + "args": { + "External id": 991673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941613343.001, "dur": 323.670, + "args": { + "External id": 991674,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941613449.546, "dur": 9.924, + "args": { + "External id": 991675,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941613462.151, "dur": 3.303, + "args": { + "External id": 991676,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941613466.861, "dur": 5.727, + "args": { + "External id": 991677,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941613473.949, "dur": 2.452, + "args": { + "External id": 991678,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941613534.609, "dur": 5.805, + "args": { + "External id": 991679,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941613536.676, "dur": 3.514, + "args": { + "External id": 991680,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941613542.375, "dur": 41.935, + "args": { + "External id": 991681,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941613549.234, "dur": 2.027, + "args": { + "External id": 991682,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941613586.239, "dur": 1.718, + "args": { + "External id": 991683,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941613587.165, "dur": 0.705, + "args": { + "External id": 991684,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941613588.974, "dur": 18.657, + "args": { + "External id": 991685,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941613591.553, "dur": 1.048, + "args": { + "External id": 991686,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941613717.207, "dur": 32.915, + "args": { + "External id": 991687,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941613771.851, "dur": 21.046, + "args": { + "External id": 991688,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941613803.524, "dur": 49.080, + "args": { + "External id": 991689,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941613861.231, "dur": 44.496, + "args": { + "External id": 991690,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941613915.455, "dur": 26.406, + "args": { + "External id": 991691,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941613951.291, "dur": 36.690, + "args": { + "External id": 991692,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941613996.650, "dur": 92.417, + "args": { + "External id": 991693,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941614106.405, "dur": 41.360, + "args": { + "External id": 991694,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941614170.843, "dur": 29.452, + "args": { + "External id": 991695,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941614218.143, "dur": 28.328, + "args": { + "External id": 991696,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941614263.243, "dur": 21.132, + "args": { + "External id": 991697,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941614315.362, "dur": 18.890, + "args": { + "External id": 991698,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941614349.669, "dur": 24.717, + "args": { + "External id": 991699,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614463.520, "dur": 17.400, + "args": { + "External id": 991700,"Record function id": 0, "Ev Idx": 9683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614466.884, "dur": 12.888, + "args": { + "External id": 991701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614471.667, "dur": 7.067, + "args": { + "External id": 991702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614473.717, "dur": 4.828, + "args": { + "External id": 991703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614485.637, "dur": 5.713, + "args": { + "External id": 991704,"Record function id": 0, "Ev Idx": 9687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614487.038, "dur": 3.762, + "args": { + "External id": 991705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614487.920, "dur": 2.303, + "args": { + "External id": 991706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614489.022, "dur": 1.105, + "args": { + "External id": 991707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614495.144, "dur": 5.573, + "args": { + "External id": 991708,"Record function id": 0, "Ev Idx": 9691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614496.687, "dur": 3.515, + "args": { + "External id": 991709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614497.611, "dur": 1.924, + "args": { + "External id": 991710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614498.296, "dur": 1.156, + "args": { + "External id": 991711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614504.434, "dur": 5.542, + "args": { + "External id": 991712,"Record function id": 0, "Ev Idx": 9695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614506.276, "dur": 3.209, + "args": { + "External id": 991713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614507.284, "dur": 1.684, + "args": { + "External id": 991714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614507.877, "dur": 1.017, + "args": { + "External id": 991715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614513.753, "dur": 4.274, + "args": { + "External id": 991716,"Record function id": 0, "Ev Idx": 9699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614514.926, "dur": 2.607, + "args": { + "External id": 991717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614515.564, "dur": 1.363, + "args": { + "External id": 991718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614516.106, "dur": 0.733, + "args": { + "External id": 991719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614521.962, "dur": 7.418, + "args": { + "External id": 991720,"Record function id": 0, "Ev Idx": 9703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614523.150, "dur": 5.719, + "args": { + "External id": 991721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614524.104, "dur": 4.228, + "args": { + "External id": 991722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614524.757, "dur": 3.438, + "args": { + "External id": 991723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614533.256, "dur": 4.836, + "args": { + "External id": 991724,"Record function id": 0, "Ev Idx": 9707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614534.711, "dur": 2.891, + "args": { + "External id": 991725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614535.463, "dur": 1.309, + "args": { + "External id": 991726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614535.821, "dur": 0.829, + "args": { + "External id": 991727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614542.167, "dur": 4.650, + "args": { + "External id": 991728,"Record function id": 0, "Ev Idx": 9711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614543.497, "dur": 2.832, + "args": { + "External id": 991729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614544.204, "dur": 1.342, + "args": { + "External id": 991730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614544.686, "dur": 0.756, + "args": { + "External id": 991731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614550.896, "dur": 4.902, + "args": { + "External id": 991732,"Record function id": 0, "Ev Idx": 9715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941614552.289, "dur": 2.996, + "args": { + "External id": 991733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614552.904, "dur": 1.643, + "args": { + "External id": 991734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941614553.402, "dur": 1.005, + "args": { + "External id": 991735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941614561.313, "dur": 62756.829, + "args": { + "External id": 991736,"Record function id": 0, "Sequence number": 10552496, "Fwd thread id": 1, "Ev Idx": 9719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941614563.022, "dur": 62743.728, + "args": { + "External id": 991737,"Sequence number": 10552496, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9720 + } + }, + { + "ph": "f", "id": 423, "pid": 2338708, "tid": 2379421, "ts": 6345941614563.022, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 2338708, "tid": 2379421, + "ts": 6345941614597.246, "dur": 46.397, + "args": { + "External id": 991738,"Record function id": 0, "Ev Idx": 9721 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 2338708, "tid": 2379421, + "ts": 6345941614652.749, "dur": 79.091, + "args": { + "External id": 991739,"Record function id": 0, "Ev Idx": 9722 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 2338708, "tid": 2379421, + "ts": 6345941614739.002, "dur": 62557.164, + "args": { + "External id": 991740,"Record function id": 0, "Ev Idx": 9723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941614844.327, "dur": 7.845, + "args": { + "External id": 991741,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941614862.608, "dur": 5.381, + "args": { + "External id": 991742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941614884.392, "dur": 61301.574, + "args": { + "External id": 991743,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941614901.716, "dur": 61267.532, + "args": { + "External id": 991744,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941615102.608, "dur": 28.920, + "args": { + "External id": 991745,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941615168.616, "dur": 60955.935, + "args": { + "External id": 991746,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941615172.888, "dur": 60950.850, + "args": { + "External id": 991747,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941615186.748, "dur": 19.448, + "args": { + "External id": 991748,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941615208.417, "dur": 60913.203, + "args": { + "External id": 991749,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941676313.960, "dur": 13.057, + "args": { + "External id": 991750,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941676318.089, "dur": 8.444, + "args": { + "External id": 991751,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941676363.882, "dur": 416.375, + "args": { + "External id": 991752,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941676399.541, "dur": 374.681, + "args": { + "External id": 991753,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9736, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941676415.026, "dur": 352.542, + "args": { + "External id": 991754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941676804.222, "dur": 2.575, + "args": { + "External id": 991755,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9738, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941676877.006, "dur": 7.506, + "args": { + "External id": 991756,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941676898.913, "dur": 39.881, + "args": { + "External id": 991757,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941676952.273, "dur": 4.049, + "args": { + "External id": 991758,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941676962.788, "dur": 13.814, + "args": { + "External id": 991759,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941676983.179, "dur": 1.301, + "args": { + "External id": 991760,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941676989.323, "dur": 12.768, + "args": { + "External id": 991761,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941677028.521, "dur": 1.945, + "args": { + "External id": 991762,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941677037.546, "dur": 51.189, + "args": { + "External id": 991763,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941677099.516, "dur": 1.944, + "args": { + "External id": 991764,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941677107.776, "dur": 14.761, + "args": { + "External id": 991765,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941677128.191, "dur": 1.382, + "args": { + "External id": 991766,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941677135.256, "dur": 12.519, + "args": { + "External id": 991767,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941677153.367, "dur": 1.186, + "args": { + "External id": 991768,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941677159.497, "dur": 14.737, + "args": { + "External id": 991769,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941677179.288, "dur": 0.878, + "args": { + "External id": 991770,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941677186.953, "dur": 13.092, + "args": { + "External id": 991771,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941677205.439, "dur": 0.987, + "args": { + "External id": 991772,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941677211.325, "dur": 12.965, + "args": { + "External id": 991773,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941677336.853, "dur": 3412.969, + "args": { + "External id": 991774,"Record function id": 0, "Ev Idx": 9757 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 2338708, "tid": 2379421, + "ts": 6345941677362.077, "dur": 1246.008, + "args": { + "External id": 991775,"Record function id": 0, "Ev Idx": 9758 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338708, "tid": 2379421, + "ts": 6345941677381.173, "dur": 379.147, + "args": { + "External id": 991776,"Record function id": 0, "Ev Idx": 9759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941677483.481, "dur": 7.434, + "args": { + "External id": 991777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941677494.794, "dur": 1.618, + "args": { + "External id": 991778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941677499.116, "dur": 1.211, + "args": { + "External id": 991779,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941677502.364, "dur": 1.085, + "args": { + "External id": 991780,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941677505.717, "dur": 1.072, + "args": { + "External id": 991781,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941677508.736, "dur": 0.896, + "args": { + "External id": 991782,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941677511.587, "dur": 1.027, + "args": { + "External id": 991783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941677516.814, "dur": 2.352, + "args": { + "External id": 991784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941677521.036, "dur": 2.937, + "args": { + "External id": 991785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941677525.479, "dur": 0.753, + "args": { + "External id": 991786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941677552.738, "dur": 170.119, + "args": { + "External id": 991787,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941677572.373, "dur": 144.832, + "args": { + "External id": 991788,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941677589.138, "dur": 17.944, + "args": { + "External id": 991789,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941677611.563, "dur": 76.138, + "args": { + "External id": 991790,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941677614.786, "dur": 72.503, + "args": { + "External id": 991791,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941677619.831, "dur": 6.325, + "args": { + "External id": 991792,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941677628.492, "dur": 58.144, + "args": { + "External id": 991793,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9776 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 2338708, "tid": 2379421, + "ts": 6345941677861.934, "dur": 735.747, + "args": { + "External id": 991794,"Record function id": 0, "Ev Idx": 9777 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338708, "tid": 2379421, + "ts": 6345941677882.366, "dur": 700.828, + "args": { + "External id": 991795,"Record function id": 0, "Ev Idx": 9778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941677956.507, "dur": 5.507, + "args": { + "External id": 991796,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941677979.712, "dur": 53.959, + "args": { + "External id": 991797,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941677985.708, "dur": 1.453, + "args": { + "External id": 991798,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941677990.101, "dur": 0.805, + "args": { + "External id": 991799,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941677992.083, "dur": 0.683, + "args": { + "External id": 991800,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941677994.467, "dur": 2.701, + "args": { + "External id": 991801,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941677998.881, "dur": 0.331, + "args": { + "External id": 991802,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941678000.582, "dur": 0.879, + "args": { + "External id": 991803,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941678003.036, "dur": 0.505, + "args": { + "External id": 991804,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941678005.452, "dur": 0.622, + "args": { + "External id": 991805,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941678007.219, "dur": 20.902, + "args": { + "External id": 991806,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941678047.854, "dur": 91.040, + "args": { + "External id": 991807,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941678182.756, "dur": 130.164, + "args": { + "External id": 991808,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941678195.832, "dur": 5.270, + "args": { + "External id": 991809,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941678208.162, "dur": 12.225, + "args": { + "External id": 991810,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941678212.831, "dur": 7.042, + "args": { + "External id": 991811,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941678216.931, "dur": 1.062, + "args": { + "External id": 991812,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941678229.191, "dur": 27.986, + "args": { + "External id": 991813,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941678231.378, "dur": 2.823, + "args": { + "External id": 991814,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941678235.749, "dur": 0.444, + "args": { + "External id": 991815,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941678237.968, "dur": 0.500, + "args": { + "External id": 991816,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941678240.046, "dur": 0.956, + "args": { + "External id": 991817,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941678242.058, "dur": 0.616, + "args": { + "External id": 991818,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941678244.444, "dur": 0.517, + "args": { + "External id": 991819,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941678246.669, "dur": 0.483, + "args": { + "External id": 991820,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941678248.382, "dur": 0.392, + "args": { + "External id": 991821,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941678250.349, "dur": 2.889, + "args": { + "External id": 991822,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941678270.972, "dur": 32.848, + "args": { + "External id": 991823,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941678369.922, "dur": 135.298, + "args": { + "External id": 991824,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941678399.454, "dur": 101.646, + "args": { + "External id": 991825,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9808, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941678410.258, "dur": 86.158, + "args": { + "External id": 991826,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941678522.649, "dur": 2.220, + "args": { + "External id": 991827,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9810, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941678616.953, "dur": 2109.316, + "args": { + "External id": 991828,"Sequence number": 10552495, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9811 + } + }, + { + "ph": "f", "id": 424, "pid": 2338708, "tid": 2379421, "ts": 6345941678616.953, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941678745.098, "dur": 121.692, + "args": { + "External id": 991829,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941678912.540, "dur": 49.359, + "args": { + "External id": 991830,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941678985.620, "dur": 127.760, + "args": { + "External id": 991831,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941679131.407, "dur": 44.725, + "args": { + "External id": 991832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941679184.859, "dur": 37.913, + "args": { + "External id": 991833,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941679231.929, "dur": 34.685, + "args": { + "External id": 991834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941679278.088, "dur": 36.805, + "args": { + "External id": 991835,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941679345.290, "dur": 30.047, + "args": { + "External id": 991836,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941679398.269, "dur": 34.094, + "args": { + "External id": 991837,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941679458.055, "dur": 25.892, + "args": { + "External id": 991838,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941679502.175, "dur": 19.302, + "args": { + "External id": 991839,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941679534.676, "dur": 45.782, + "args": { + "External id": 991840,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941679585.261, "dur": 40.578, + "args": { + "External id": 991841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941679660.187, "dur": 324.104, + "args": { + "External id": 991842,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941679754.613, "dur": 8.319, + "args": { + "External id": 991843,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941679765.745, "dur": 14.689, + "args": { + "External id": 991844,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941679785.973, "dur": 4.283, + "args": { + "External id": 991845,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941679791.676, "dur": 4.877, + "args": { + "External id": 991846,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941679851.611, "dur": 6.425, + "args": { + "External id": 991847,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941679854.169, "dur": 3.624, + "args": { + "External id": 991848,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941679860.122, "dur": 35.411, + "args": { + "External id": 991849,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941679866.863, "dur": 2.192, + "args": { + "External id": 991850,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941679897.518, "dur": 7.957, + "args": { + "External id": 991851,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941679904.598, "dur": 0.777, + "args": { + "External id": 991852,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941679906.793, "dur": 16.685, + "args": { + "External id": 991853,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 9836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941679909.339, "dur": 0.602, + "args": { + "External id": 991854,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 9837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941680050.966, "dur": 77.099, + "args": { + "External id": 991855,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941680157.507, "dur": 21.221, + "args": { + "External id": 991856,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941680190.624, "dur": 62.247, + "args": { + "External id": 991857,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941680261.680, "dur": 50.014, + "args": { + "External id": 991858,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941680322.769, "dur": 28.131, + "args": { + "External id": 991859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 9842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941680360.335, "dur": 38.518, + "args": { + "External id": 991860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 9843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941680407.942, "dur": 33.203, + "args": { + "External id": 991861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941680450.043, "dur": 35.599, + "args": { + "External id": 991862,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941680507.597, "dur": 28.919, + "args": { + "External id": 991863,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 9846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941680554.689, "dur": 31.370, + "args": { + "External id": 991864,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941680603.738, "dur": 19.726, + "args": { + "External id": 991865,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941680640.198, "dur": 16.008, + "args": { + "External id": 991866,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941680670.203, "dur": 18.069, + "args": { + "External id": 991867,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 9850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680776.424, "dur": 18.115, + "args": { + "External id": 991868,"Record function id": 0, "Ev Idx": 9851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680780.556, "dur": 12.949, + "args": { + "External id": 991869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680785.292, "dur": 7.297, + "args": { + "External id": 991870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680787.377, "dur": 5.060, + "args": { + "External id": 991871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680799.287, "dur": 5.594, + "args": { + "External id": 991872,"Record function id": 0, "Ev Idx": 9855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680800.844, "dur": 3.478, + "args": { + "External id": 991873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680801.628, "dur": 2.102, + "args": { + "External id": 991874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680802.523, "dur": 1.105, + "args": { + "External id": 991875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680809.021, "dur": 4.816, + "args": { + "External id": 991876,"Record function id": 0, "Ev Idx": 9859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680810.300, "dur": 3.043, + "args": { + "External id": 991877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680811.033, "dur": 1.589, + "args": { + "External id": 991878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680811.484, "dur": 1.053, + "args": { + "External id": 991879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 9862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680817.710, "dur": 4.537, + "args": { + "External id": 991880,"Record function id": 0, "Ev Idx": 9863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680819.034, "dur": 2.720, + "args": { + "External id": 991881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680819.705, "dur": 1.450, + "args": { + "External id": 991882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680820.264, "dur": 0.810, + "args": { + "External id": 991883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 9866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680826.071, "dur": 4.477, + "args": { + "External id": 991884,"Record function id": 0, "Ev Idx": 9867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680827.550, "dur": 2.492, + "args": { + "External id": 991885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680828.200, "dur": 1.188, + "args": { + "External id": 991886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680828.571, "dur": 0.732, + "args": { + "External id": 991887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680834.470, "dur": 7.090, + "args": { + "External id": 991888,"Record function id": 0, "Ev Idx": 9871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680835.620, "dur": 5.396, + "args": { + "External id": 991889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680836.242, "dur": 4.251, + "args": { + "External id": 991890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680836.923, "dur": 3.446, + "args": { + "External id": 991891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 9874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680845.608, "dur": 4.288, + "args": { + "External id": 991892,"Record function id": 0, "Ev Idx": 9875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680846.870, "dur": 2.527, + "args": { + "External id": 991893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680847.666, "dur": 1.254, + "args": { + "External id": 991894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680848.062, "dur": 0.760, + "args": { + "External id": 991895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680853.752, "dur": 4.199, + "args": { + "External id": 991896,"Record function id": 0, "Ev Idx": 9879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680855.038, "dur": 2.378, + "args": { + "External id": 991897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680855.733, "dur": 1.177, + "args": { + "External id": 991898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680856.091, "dur": 0.714, + "args": { + "External id": 991899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 9882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680862.343, "dur": 5.098, + "args": { + "External id": 991900,"Record function id": 0, "Ev Idx": 9883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941680863.749, "dur": 3.156, + "args": { + "External id": 991901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680864.558, "dur": 1.819, + "args": { + "External id": 991902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941680865.289, "dur": 0.949, + "args": { + "External id": 991903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 9886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941680873.197, "dur": 60945.808, + "args": { + "External id": 991904,"Record function id": 0, "Sequence number": 10552494, "Fwd thread id": 1, "Ev Idx": 9887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941680875.304, "dur": 60933.103, + "args": { + "External id": 991905,"Sequence number": 10552494, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9888 + } + }, + { + "ph": "f", "id": 425, "pid": 2338708, "tid": 2379421, "ts": 6345941680875.304, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 2338708, "tid": 2379421, + "ts": 6345941680910.830, "dur": 43.774, + "args": { + "External id": 991906,"Record function id": 0, "Ev Idx": 9889 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 2338708, "tid": 2379421, + "ts": 6345941680964.115, "dur": 144.401, + "args": { + "External id": 991907,"Record function id": 0, "Ev Idx": 9890 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 2338708, "tid": 2379421, + "ts": 6345941681119.274, "dur": 60679.628, + "args": { + "External id": 991908,"Record function id": 0, "Ev Idx": 9891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941681229.619, "dur": 8.689, + "args": { + "External id": 991909,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941681250.970, "dur": 5.873, + "args": { + "External id": 991910,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941681275.270, "dur": 59378.692, + "args": { + "External id": 991911,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941681291.712, "dur": 59346.467, + "args": { + "External id": 991912,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 9895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941681407.007, "dur": 24.391, + "args": { + "External id": 991913,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941681454.505, "dur": 59132.826, + "args": { + "External id": 991914,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 9897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941681459.379, "dur": 59126.723, + "args": { + "External id": 991915,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 9898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941681464.981, "dur": 9.466, + "args": { + "External id": 991916,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941681480.389, "dur": 59099.672, + "args": { + "External id": 991917,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 9900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941740781.009, "dur": 13.364, + "args": { + "External id": 991918,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 9901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941740784.599, "dur": 9.213, + "args": { + "External id": 991919,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941740830.744, "dur": 514.125, + "args": { + "External id": 991920,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 9903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941740867.451, "dur": 470.433, + "args": { + "External id": 991921,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9904, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941740880.052, "dur": 450.277, + "args": { + "External id": 991922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 9905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941741376.204, "dur": 3.022, + "args": { + "External id": 991923,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9906, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941741456.964, "dur": 7.833, + "args": { + "External id": 991924,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941741479.658, "dur": 40.904, + "args": { + "External id": 991925,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941741533.232, "dur": 4.249, + "args": { + "External id": 991926,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941741543.950, "dur": 14.153, + "args": { + "External id": 991927,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941741565.295, "dur": 1.260, + "args": { + "External id": 991928,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941741571.763, "dur": 12.953, + "args": { + "External id": 991929,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941741590.535, "dur": 1.145, + "args": { + "External id": 991930,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941741596.843, "dur": 11.603, + "args": { + "External id": 991931,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 9914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941741613.630, "dur": 1.245, + "args": { + "External id": 991932,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941741619.936, "dur": 13.529, + "args": { + "External id": 991933,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 9916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941741638.151, "dur": 1.661, + "args": { + "External id": 991934,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941741644.686, "dur": 13.436, + "args": { + "External id": 991935,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 9918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941741663.558, "dur": 1.139, + "args": { + "External id": 991936,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941741669.576, "dur": 13.203, + "args": { + "External id": 991937,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941741690.320, "dur": 0.922, + "args": { + "External id": 991938,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941741695.778, "dur": 12.848, + "args": { + "External id": 991939,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 9922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941741713.542, "dur": 0.824, + "args": { + "External id": 991940,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941741719.226, "dur": 12.177, + "args": { + "External id": 991941,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 9924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941741836.592, "dur": 3488.883, + "args": { + "External id": 991942,"Record function id": 0, "Ev Idx": 9925 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 2338708, "tid": 2379421, + "ts": 6345941741860.775, "dur": 1321.445, + "args": { + "External id": 991943,"Record function id": 0, "Ev Idx": 9926 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338708, "tid": 2379421, + "ts": 6345941741878.643, "dur": 445.915, + "args": { + "External id": 991944,"Record function id": 0, "Ev Idx": 9927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941741977.925, "dur": 7.278, + "args": { + "External id": 991945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 9928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941741988.640, "dur": 1.053, + "args": { + "External id": 991946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941741991.966, "dur": 1.040, + "args": { + "External id": 991947,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941741994.737, "dur": 1.009, + "args": { + "External id": 991948,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941741999.933, "dur": 1.230, + "args": { + "External id": 991949,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 9932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941742003.314, "dur": 1.147, + "args": { + "External id": 991950,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 9933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941742006.389, "dur": 18.079, + "args": { + "External id": 991951,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 9934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941742029.065, "dur": 3.021, + "args": { + "External id": 991952,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941742036.557, "dur": 2.866, + "args": { + "External id": 991953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941742041.159, "dur": 0.773, + "args": { + "External id": 991954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 9937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941742102.706, "dur": 180.841, + "args": { + "External id": 991955,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941742124.569, "dur": 152.985, + "args": { + "External id": 991956,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 9939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941742145.820, "dur": 16.299, + "args": { + "External id": 991957,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941742166.780, "dur": 79.989, + "args": { + "External id": 991958,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 9941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941742170.274, "dur": 75.935, + "args": { + "External id": 991959,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 9942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742175.232, "dur": 7.353, + "args": { + "External id": 991960,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941742184.662, "dur": 60.603, + "args": { + "External id": 991961,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 9944 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 2338708, "tid": 2379421, + "ts": 6345941742432.303, "dur": 739.577, + "args": { + "External id": 991962,"Record function id": 0, "Ev Idx": 9945 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338708, "tid": 2379421, + "ts": 6345941742452.766, "dur": 702.634, + "args": { + "External id": 991963,"Record function id": 0, "Ev Idx": 9946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941742529.291, "dur": 6.213, + "args": { + "External id": 991964,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941742554.208, "dur": 31.813, + "args": { + "External id": 991965,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742560.045, "dur": 2.103, + "args": { + "External id": 991966,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742564.298, "dur": 0.957, + "args": { + "External id": 991967,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742567.113, "dur": 0.512, + "args": { + "External id": 991968,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742569.340, "dur": 2.233, + "args": { + "External id": 991969,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742572.578, "dur": 0.514, + "args": { + "External id": 991970,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742575.059, "dur": 0.523, + "args": { + "External id": 991971,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742577.222, "dur": 0.485, + "args": { + "External id": 991972,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742579.134, "dur": 0.479, + "args": { + "External id": 991973,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742581.436, "dur": 0.512, + "args": { + "External id": 991974,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941742598.486, "dur": 48.326, + "args": { + "External id": 991975,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941742683.422, "dur": 128.433, + "args": { + "External id": 991976,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 9959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941742695.442, "dur": 3.521, + "args": { + "External id": 991977,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941742705.411, "dur": 11.915, + "args": { + "External id": 991978,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941742710.312, "dur": 6.517, + "args": { + "External id": 991979,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 9962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742714.535, "dur": 0.884, + "args": { + "External id": 991980,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 9963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941742728.617, "dur": 27.701, + "args": { + "External id": 991981,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 9964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742731.005, "dur": 2.942, + "args": { + "External id": 991982,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742735.532, "dur": 0.656, + "args": { + "External id": 991983,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742738.100, "dur": 0.474, + "args": { + "External id": 991984,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742740.617, "dur": 0.397, + "args": { + "External id": 991985,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742742.294, "dur": 0.455, + "args": { + "External id": 991986,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742744.439, "dur": 0.606, + "args": { + "External id": 991987,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742746.444, "dur": 0.316, + "args": { + "External id": 991988,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742747.792, "dur": 0.471, + "args": { + "External id": 991989,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941742749.702, "dur": 2.544, + "args": { + "External id": 991990,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 9973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941742768.710, "dur": 33.716, + "args": { + "External id": 991991,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 9974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941742863.843, "dur": 132.600, + "args": { + "External id": 991992,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 9975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941742894.034, "dur": 98.369, + "args": { + "External id": 991993,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9976, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941742904.579, "dur": 83.271, + "args": { + "External id": 991994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 9977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941743039.604, "dur": 3.296, + "args": { + "External id": 991995,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9978, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941743191.267, "dur": 2110.273, + "args": { + "External id": 991996,"Sequence number": 10552493, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 9979 + } + }, + { + "ph": "f", "id": 426, "pid": 2338708, "tid": 2379421, "ts": 6345941743191.267, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941743326.234, "dur": 125.482, + "args": { + "External id": 991997,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 9980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941743500.461, "dur": 47.020, + "args": { + "External id": 991998,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 9981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941743571.703, "dur": 58.412, + "args": { + "External id": 991999,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 9982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941743642.235, "dur": 37.498, + "args": { + "External id": 992000,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941743687.268, "dur": 37.384, + "args": { + "External id": 992001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941743732.232, "dur": 33.247, + "args": { + "External id": 992002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 9985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941743776.416, "dur": 33.759, + "args": { + "External id": 992003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 9986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941743841.280, "dur": 27.806, + "args": { + "External id": 992004,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 9987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941743892.407, "dur": 35.562, + "args": { + "External id": 992005,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941743953.290, "dur": 22.985, + "args": { + "External id": 992006,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 9989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941743994.436, "dur": 38.948, + "args": { + "External id": 992007,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 9990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941744050.323, "dur": 92.867, + "args": { + "External id": 992008,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 9991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941744151.012, "dur": 43.478, + "args": { + "External id": 992009,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 9992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941744247.116, "dur": 321.212, + "args": { + "External id": 992010,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 9993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941744350.371, "dur": 12.835, + "args": { + "External id": 992011,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941744365.800, "dur": 3.472, + "args": { + "External id": 992012,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941744370.882, "dur": 2.609, + "args": { + "External id": 992013,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941744375.018, "dur": 5.115, + "args": { + "External id": 992014,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941744437.531, "dur": 6.363, + "args": { + "External id": 992015,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941744439.837, "dur": 3.822, + "args": { + "External id": 992016,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 9999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941744452.463, "dur": 39.301, + "args": { + "External id": 992017,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941744459.503, "dur": 2.089, + "args": { + "External id": 992018,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941744493.751, "dur": 1.935, + "args": { + "External id": 992019,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941744494.727, "dur": 0.866, + "args": { + "External id": 992020,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941744497.131, "dur": 19.887, + "args": { + "External id": 992021,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941744499.826, "dur": 0.659, + "args": { + "External id": 992022,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941744612.848, "dur": 35.701, + "args": { + "External id": 992023,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941744669.200, "dur": 21.273, + "args": { + "External id": 992024,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941744700.619, "dur": 47.733, + "args": { + "External id": 992025,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941744756.720, "dur": 46.039, + "args": { + "External id": 992026,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941744813.104, "dur": 28.485, + "args": { + "External id": 992027,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941744852.037, "dur": 39.874, + "args": { + "External id": 992028,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941744900.611, "dur": 34.552, + "args": { + "External id": 992029,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941744944.539, "dur": 37.246, + "args": { + "External id": 992030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941745002.725, "dur": 50.088, + "args": { + "External id": 992031,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941745112.775, "dur": 37.108, + "args": { + "External id": 992032,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941745170.106, "dur": 22.011, + "args": { + "External id": 992033,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941745211.600, "dur": 17.413, + "args": { + "External id": 992034,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941745243.733, "dur": 18.494, + "args": { + "External id": 992035,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745351.387, "dur": 19.134, + "args": { + "External id": 992036,"Record function id": 0, "Ev Idx": 10019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745354.947, "dur": 14.443, + "args": { + "External id": 992037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745360.291, "dur": 7.824, + "args": { + "External id": 992038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745362.472, "dur": 5.435, + "args": { + "External id": 992039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745375.354, "dur": 5.408, + "args": { + "External id": 992040,"Record function id": 0, "Ev Idx": 10023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745376.692, "dur": 3.473, + "args": { + "External id": 992041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745377.580, "dur": 2.034, + "args": { + "External id": 992042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745378.356, "dur": 1.155, + "args": { + "External id": 992043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745384.689, "dur": 5.805, + "args": { + "External id": 992044,"Record function id": 0, "Ev Idx": 10027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745386.232, "dur": 3.764, + "args": { + "External id": 992045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745387.266, "dur": 2.171, + "args": { + "External id": 992046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745388.020, "dur": 1.334, + "args": { + "External id": 992047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745394.455, "dur": 4.246, + "args": { + "External id": 992048,"Record function id": 0, "Ev Idx": 10031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745395.574, "dur": 2.633, + "args": { + "External id": 992049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745396.194, "dur": 1.499, + "args": { + "External id": 992050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745396.737, "dur": 0.875, + "args": { + "External id": 992051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745402.463, "dur": 4.557, + "args": { + "External id": 992052,"Record function id": 0, "Ev Idx": 10035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745403.733, "dur": 2.818, + "args": { + "External id": 992053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745404.315, "dur": 1.719, + "args": { + "External id": 992054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745404.912, "dur": 1.031, + "args": { + "External id": 992055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745410.840, "dur": 7.255, + "args": { + "External id": 992056,"Record function id": 0, "Ev Idx": 10039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745411.995, "dur": 5.604, + "args": { + "External id": 992057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745412.660, "dur": 4.300, + "args": { + "External id": 992058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745413.261, "dur": 3.586, + "args": { + "External id": 992059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745422.058, "dur": 4.493, + "args": { + "External id": 992060,"Record function id": 0, "Ev Idx": 10043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745423.492, "dur": 2.535, + "args": { + "External id": 992061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745424.253, "dur": 1.254, + "args": { + "External id": 992062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745424.676, "dur": 0.755, + "args": { + "External id": 992063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745430.314, "dur": 4.612, + "args": { + "External id": 992064,"Record function id": 0, "Ev Idx": 10047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745431.680, "dur": 2.760, + "args": { + "External id": 992065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745432.471, "dur": 1.436, + "args": { + "External id": 992066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745432.867, "dur": 0.911, + "args": { + "External id": 992067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745438.580, "dur": 4.609, + "args": { + "External id": 992068,"Record function id": 0, "Ev Idx": 10051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941745439.755, "dur": 2.956, + "args": { + "External id": 992069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745440.541, "dur": 1.621, + "args": { + "External id": 992070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941745441.195, "dur": 0.827, + "args": { + "External id": 992071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941745448.588, "dur": 66049.829, + "args": { + "External id": 992072,"Record function id": 0, "Sequence number": 10552492, "Fwd thread id": 1, "Ev Idx": 10055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941745450.346, "dur": 66035.766, + "args": { + "External id": 992073,"Sequence number": 10552492, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10056 + } + }, + { + "ph": "f", "id": 427, "pid": 2338708, "tid": 2379421, "ts": 6345941745450.346, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 2338708, "tid": 2379421, + "ts": 6345941745485.807, "dur": 44.560, + "args": { + "External id": 992074,"Record function id": 0, "Ev Idx": 10057 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 2338708, "tid": 2379421, + "ts": 6345941745539.702, "dur": 79.472, + "args": { + "External id": 992075,"Record function id": 0, "Ev Idx": 10058 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 2338708, "tid": 2379421, + "ts": 6345941745626.148, "dur": 65850.209, + "args": { + "External id": 992076,"Record function id": 0, "Ev Idx": 10059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941745731.059, "dur": 8.309, + "args": { + "External id": 992077,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941745750.504, "dur": 5.604, + "args": { + "External id": 992078,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941745772.690, "dur": 64601.571, + "args": { + "External id": 992079,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941745789.608, "dur": 64569.148, + "args": { + "External id": 992080,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941745920.890, "dur": 26.931, + "args": { + "External id": 992081,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941745972.385, "dur": 64340.368, + "args": { + "External id": 992082,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941745975.469, "dur": 64336.357, + "args": { + "External id": 992083,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941745981.237, "dur": 14.183, + "args": { + "External id": 992084,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941745997.929, "dur": 64309.957, + "args": { + "External id": 992085,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941810499.316, "dur": 12.891, + "args": { + "External id": 992086,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941810503.186, "dur": 8.488, + "args": { + "External id": 992087,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941810548.727, "dur": 409.901, + "args": { + "External id": 992088,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941810586.094, "dur": 366.540, + "args": { + "External id": 992089,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10072, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941810599.366, "dur": 345.301, + "args": { + "External id": 992090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941810992.035, "dur": 2.492, + "args": { + "External id": 992091,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10074, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941811125.163, "dur": 9.880, + "args": { + "External id": 992092,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941811150.294, "dur": 44.246, + "args": { + "External id": 992093,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941811206.884, "dur": 1.877, + "args": { + "External id": 992094,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941811215.731, "dur": 14.501, + "args": { + "External id": 992095,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941811236.284, "dur": 1.311, + "args": { + "External id": 992096,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941811242.526, "dur": 13.719, + "args": { + "External id": 992097,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941811262.082, "dur": 1.060, + "args": { + "External id": 992098,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941811267.920, "dur": 14.374, + "args": { + "External id": 992099,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941811287.659, "dur": 1.009, + "args": { + "External id": 992100,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941811292.791, "dur": 13.831, + "args": { + "External id": 992101,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941811311.245, "dur": 0.986, + "args": { + "External id": 992102,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941811317.433, "dur": 13.144, + "args": { + "External id": 992103,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941811335.524, "dur": 1.298, + "args": { + "External id": 992104,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941811341.779, "dur": 12.575, + "args": { + "External id": 992105,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941811359.910, "dur": 0.720, + "args": { + "External id": 992106,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941811364.996, "dur": 11.678, + "args": { + "External id": 992107,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941811384.202, "dur": 3.735, + "args": { + "External id": 992108,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941811392.020, "dur": 14.559, + "args": { + "External id": 992109,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941811518.603, "dur": 3367.029, + "args": { + "External id": 992110,"Record function id": 0, "Ev Idx": 10093 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 2338708, "tid": 2379421, + "ts": 6345941811543.343, "dur": 1216.904, + "args": { + "External id": 992111,"Record function id": 0, "Ev Idx": 10094 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338708, "tid": 2379421, + "ts": 6345941811560.829, "dur": 363.019, + "args": { + "External id": 992112,"Record function id": 0, "Ev Idx": 10095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941811661.945, "dur": 5.168, + "args": { + "External id": 992113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941811671.247, "dur": 1.147, + "args": { + "External id": 992114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941811674.454, "dur": 1.304, + "args": { + "External id": 992115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941811678.314, "dur": 1.243, + "args": { + "External id": 992116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941811681.210, "dur": 1.070, + "args": { + "External id": 992117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941811683.849, "dur": 1.278, + "args": { + "External id": 992118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941811689.043, "dur": 0.992, + "args": { + "External id": 992119,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941811691.583, "dur": 4.677, + "args": { + "External id": 992120,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941811697.872, "dur": 0.771, + "args": { + "External id": 992121,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941811700.610, "dur": 0.893, + "args": { + "External id": 992122,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941811724.310, "dur": 166.370, + "args": { + "External id": 992123,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941811743.723, "dur": 141.267, + "args": { + "External id": 992124,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941811761.335, "dur": 17.165, + "args": { + "External id": 992125,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941811782.504, "dur": 75.582, + "args": { + "External id": 992126,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941811785.814, "dur": 71.921, + "args": { + "External id": 992127,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941811790.755, "dur": 7.090, + "args": { + "External id": 992128,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941811800.048, "dur": 56.928, + "args": { + "External id": 992129,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10112 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 2338708, "tid": 2379421, + "ts": 6345941812051.457, "dur": 698.753, + "args": { + "External id": 992130,"Record function id": 0, "Ev Idx": 10113 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338708, "tid": 2379421, + "ts": 6345941812114.075, "dur": 620.715, + "args": { + "External id": 992131,"Record function id": 0, "Ev Idx": 10114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941812190.294, "dur": 8.015, + "args": { + "External id": 992132,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941812216.764, "dur": 33.218, + "args": { + "External id": 992133,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812222.845, "dur": 1.949, + "args": { + "External id": 992134,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812227.575, "dur": 0.605, + "args": { + "External id": 992135,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812229.838, "dur": 2.442, + "args": { + "External id": 992136,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812233.554, "dur": 0.548, + "args": { + "External id": 992137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812236.527, "dur": 0.557, + "args": { + "External id": 992138,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812238.791, "dur": 0.436, + "args": { + "External id": 992139,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812240.282, "dur": 0.781, + "args": { + "External id": 992140,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812242.885, "dur": 0.522, + "args": { + "External id": 992141,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812244.988, "dur": 0.385, + "args": { + "External id": 992142,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941812263.174, "dur": 50.226, + "args": { + "External id": 992143,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941812349.721, "dur": 125.269, + "args": { + "External id": 992144,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941812361.703, "dur": 3.716, + "args": { + "External id": 992145,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941812372.326, "dur": 13.653, + "args": { + "External id": 992146,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941812377.356, "dur": 8.152, + "args": { + "External id": 992147,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812381.634, "dur": 2.578, + "args": { + "External id": 992148,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941812394.525, "dur": 25.194, + "args": { + "External id": 992149,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812396.781, "dur": 0.757, + "args": { + "External id": 992150,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812399.350, "dur": 0.917, + "args": { + "External id": 992151,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812401.606, "dur": 0.830, + "args": { + "External id": 992152,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812404.211, "dur": 0.456, + "args": { + "External id": 992153,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812406.502, "dur": 0.330, + "args": { + "External id": 992154,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812407.980, "dur": 0.385, + "args": { + "External id": 992155,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812409.886, "dur": 0.452, + "args": { + "External id": 992156,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812411.812, "dur": 2.528, + "args": { + "External id": 992157,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941812415.393, "dur": 0.677, + "args": { + "External id": 992158,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941812431.509, "dur": 33.586, + "args": { + "External id": 992159,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941812526.635, "dur": 128.345, + "args": { + "External id": 992160,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941812553.598, "dur": 97.436, + "args": { + "External id": 992161,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10144, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941812566.280, "dur": 79.736, + "args": { + "External id": 992162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941812674.489, "dur": 2.387, + "args": { + "External id": 992163,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10146, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941812768.484, "dur": 2092.873, + "args": { + "External id": 992164,"Sequence number": 10552491, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10147 + } + }, + { + "ph": "f", "id": 428, "pid": 2338708, "tid": 2379421, "ts": 6345941812768.484, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941812895.760, "dur": 141.747, + "args": { + "External id": 992165,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941813134.254, "dur": 51.494, + "args": { + "External id": 992166,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941813211.384, "dur": 66.404, + "args": { + "External id": 992167,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941813291.262, "dur": 37.766, + "args": { + "External id": 992168,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941813337.129, "dur": 37.701, + "args": { + "External id": 992169,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941813382.868, "dur": 32.442, + "args": { + "External id": 992170,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941813426.400, "dur": 34.486, + "args": { + "External id": 992171,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941813492.320, "dur": 28.734, + "args": { + "External id": 992172,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941813544.570, "dur": 33.139, + "args": { + "External id": 992173,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941813603.460, "dur": 23.033, + "args": { + "External id": 992174,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941813643.006, "dur": 18.007, + "args": { + "External id": 992175,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941813674.148, "dur": 46.687, + "args": { + "External id": 992176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941813725.741, "dur": 39.167, + "args": { + "External id": 992177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941813806.706, "dur": 396.424, + "args": { + "External id": 992178,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941813899.901, "dur": 7.412, + "args": { + "External id": 992179,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941813909.670, "dur": 3.485, + "args": { + "External id": 992180,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941813915.454, "dur": 5.030, + "args": { + "External id": 992181,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941813921.766, "dur": 2.372, + "args": { + "External id": 992182,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941813990.591, "dur": 7.170, + "args": { + "External id": 992183,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941813993.072, "dur": 4.094, + "args": { + "External id": 992184,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941814000.184, "dur": 105.333, + "args": { + "External id": 992185,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941814026.743, "dur": 3.108, + "args": { + "External id": 992186,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941814109.161, "dur": 3.316, + "args": { + "External id": 992187,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941814110.897, "dur": 1.407, + "args": { + "External id": 992188,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941814113.583, "dur": 21.865, + "args": { + "External id": 992189,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941814116.879, "dur": 1.109, + "args": { + "External id": 992190,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941814249.824, "dur": 35.527, + "args": { + "External id": 992191,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941814306.403, "dur": 20.609, + "args": { + "External id": 992192,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941814338.018, "dur": 61.189, + "args": { + "External id": 992193,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941814408.308, "dur": 47.137, + "args": { + "External id": 992194,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941814466.134, "dur": 26.154, + "args": { + "External id": 992195,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941814502.318, "dur": 36.212, + "args": { + "External id": 992196,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941814547.455, "dur": 33.953, + "args": { + "External id": 992197,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941814590.801, "dur": 35.418, + "args": { + "External id": 992198,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941814647.299, "dur": 28.337, + "args": { + "External id": 992199,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941814694.699, "dur": 28.818, + "args": { + "External id": 992200,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941814739.727, "dur": 19.120, + "args": { + "External id": 992201,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941814775.479, "dur": 16.565, + "args": { + "External id": 992202,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941814806.105, "dur": 18.048, + "args": { + "External id": 992203,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814911.683, "dur": 17.866, + "args": { + "External id": 992204,"Record function id": 0, "Ev Idx": 10187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814915.528, "dur": 12.958, + "args": { + "External id": 992205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814920.091, "dur": 7.217, + "args": { + "External id": 992206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814922.590, "dur": 4.525, + "args": { + "External id": 992207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814934.481, "dur": 6.559, + "args": { + "External id": 992208,"Record function id": 0, "Ev Idx": 10191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814936.334, "dur": 4.171, + "args": { + "External id": 992209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814937.654, "dur": 2.219, + "args": { + "External id": 992210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814938.633, "dur": 1.135, + "args": { + "External id": 992211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814944.955, "dur": 5.748, + "args": { + "External id": 992212,"Record function id": 0, "Ev Idx": 10195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814946.371, "dur": 3.818, + "args": { + "External id": 992213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814947.343, "dur": 2.337, + "args": { + "External id": 992214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814948.173, "dur": 1.411, + "args": { + "External id": 992215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814954.713, "dur": 4.637, + "args": { + "External id": 992216,"Record function id": 0, "Ev Idx": 10199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814956.221, "dur": 2.586, + "args": { + "External id": 992217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814957.074, "dur": 1.239, + "args": { + "External id": 992218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814957.486, "dur": 0.698, + "args": { + "External id": 992219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814963.226, "dur": 5.387, + "args": { + "External id": 992220,"Record function id": 0, "Ev Idx": 10203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814965.228, "dur": 2.861, + "args": { + "External id": 992221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814965.875, "dur": 1.712, + "args": { + "External id": 992222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814966.661, "dur": 0.835, + "args": { + "External id": 992223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814972.531, "dur": 7.836, + "args": { + "External id": 992224,"Record function id": 0, "Ev Idx": 10207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814973.860, "dur": 5.949, + "args": { + "External id": 992225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814974.693, "dur": 1.576, + "args": { + "External id": 992226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814975.227, "dur": 0.938, + "args": { + "External id": 992227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814984.423, "dur": 6.145, + "args": { + "External id": 992228,"Record function id": 0, "Ev Idx": 10211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814985.624, "dur": 4.433, + "args": { + "External id": 992229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814986.366, "dur": 3.197, + "args": { + "External id": 992230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814986.697, "dur": 2.769, + "args": { + "External id": 992231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814994.360, "dur": 4.587, + "args": { + "External id": 992232,"Record function id": 0, "Ev Idx": 10215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941814995.939, "dur": 2.456, + "args": { + "External id": 992233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814996.548, "dur": 1.327, + "args": { + "External id": 992234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941814996.875, "dur": 0.886, + "args": { + "External id": 992235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941815002.752, "dur": 21.456, + "args": { + "External id": 992236,"Record function id": 0, "Ev Idx": 10219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941815003.993, "dur": 2.718, + "args": { + "External id": 992237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941815004.693, "dur": 1.488, + "args": { + "External id": 992238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941815005.259, "dur": 0.777, + "args": { + "External id": 992239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941815033.271, "dur": 68477.937, + "args": { + "External id": 992240,"Record function id": 0, "Sequence number": 10552490, "Fwd thread id": 1, "Ev Idx": 10223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941815038.348, "dur": 68461.648, + "args": { + "External id": 992241,"Sequence number": 10552490, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10224 + } + }, + { + "ph": "f", "id": 429, "pid": 2338708, "tid": 2379421, "ts": 6345941815038.348, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 2338708, "tid": 2379421, + "ts": 6345941815112.992, "dur": 49.429, + "args": { + "External id": 992242,"Record function id": 0, "Ev Idx": 10225 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 2338708, "tid": 2379421, + "ts": 6345941815172.976, "dur": 82.517, + "args": { + "External id": 992243,"Record function id": 0, "Ev Idx": 10226 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 2338708, "tid": 2379421, + "ts": 6345941815263.410, "dur": 68227.020, + "args": { + "External id": 992244,"Record function id": 0, "Ev Idx": 10227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941815372.981, "dur": 8.892, + "args": { + "External id": 992245,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941815394.918, "dur": 6.103, + "args": { + "External id": 992246,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941815418.415, "dur": 66967.427, + "args": { + "External id": 992247,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941815435.465, "dur": 66935.083, + "args": { + "External id": 992248,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941815545.602, "dur": 19.917, + "args": { + "External id": 992249,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941815588.908, "dur": 66733.285, + "args": { + "External id": 992250,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941815592.138, "dur": 66728.892, + "args": { + "External id": 992251,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941815598.122, "dur": 9.861, + "args": { + "External id": 992252,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941815610.132, "dur": 66705.002, + "args": { + "External id": 992253,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941882511.751, "dur": 13.293, + "args": { + "External id": 992254,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941882515.417, "dur": 9.163, + "args": { + "External id": 992255,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941882559.950, "dur": 420.020, + "args": { + "External id": 992256,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941882623.914, "dur": 350.061, + "args": { + "External id": 992257,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10240, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941882637.167, "dur": 330.632, + "args": { + "External id": 992258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941883004.502, "dur": 21.917, + "args": { + "External id": 992259,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10242, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941883144.950, "dur": 8.522, + "args": { + "External id": 992260,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941883168.272, "dur": 44.881, + "args": { + "External id": 992261,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941883226.011, "dur": 4.231, + "args": { + "External id": 992262,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941883237.737, "dur": 15.112, + "args": { + "External id": 992263,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941883259.935, "dur": 1.154, + "args": { + "External id": 992264,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941883266.312, "dur": 14.608, + "args": { + "External id": 992265,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941883286.387, "dur": 1.175, + "args": { + "External id": 992266,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941883292.043, "dur": 12.826, + "args": { + "External id": 992267,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941883309.650, "dur": 1.013, + "args": { + "External id": 992268,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941883314.979, "dur": 11.931, + "args": { + "External id": 992269,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941883331.705, "dur": 1.242, + "args": { + "External id": 992270,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941883337.319, "dur": 11.211, + "args": { + "External id": 992271,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941883353.540, "dur": 1.042, + "args": { + "External id": 992272,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941883359.364, "dur": 12.075, + "args": { + "External id": 992273,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941883377.172, "dur": 1.057, + "args": { + "External id": 992274,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941883383.004, "dur": 11.265, + "args": { + "External id": 992275,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941883401.378, "dur": 1.003, + "args": { + "External id": 992276,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941883406.823, "dur": 12.184, + "args": { + "External id": 992277,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941883530.081, "dur": 3443.564, + "args": { + "External id": 992278,"Record function id": 0, "Ev Idx": 10261 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 2338708, "tid": 2379421, + "ts": 6345941883554.650, "dur": 1258.692, + "args": { + "External id": 992279,"Record function id": 0, "Ev Idx": 10262 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338708, "tid": 2379421, + "ts": 6345941883573.943, "dur": 366.453, + "args": { + "External id": 992280,"Record function id": 0, "Ev Idx": 10263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941883675.476, "dur": 7.818, + "args": { + "External id": 992281,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941883687.514, "dur": 1.116, + "args": { + "External id": 992282,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941883690.937, "dur": 1.166, + "args": { + "External id": 992283,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941883694.194, "dur": 1.207, + "args": { + "External id": 992284,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941883697.095, "dur": 1.019, + "args": { + "External id": 992285,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941883699.722, "dur": 0.968, + "args": { + "External id": 992286,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941883703.063, "dur": 0.710, + "args": { + "External id": 992287,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941883705.451, "dur": 2.493, + "args": { + "External id": 992288,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941883709.457, "dur": 2.992, + "args": { + "External id": 992289,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941883716.345, "dur": 0.951, + "args": { + "External id": 992290,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941883738.695, "dur": 167.688, + "args": { + "External id": 992291,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941883757.495, "dur": 143.402, + "args": { + "External id": 992292,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941883775.351, "dur": 18.154, + "args": { + "External id": 992293,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941883797.263, "dur": 75.952, + "args": { + "External id": 992294,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941883800.707, "dur": 72.102, + "args": { + "External id": 992295,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941883805.741, "dur": 7.170, + "args": { + "External id": 992296,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941883814.944, "dur": 57.233, + "args": { + "External id": 992297,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10280 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 2338708, "tid": 2379421, + "ts": 6345941884114.103, "dur": 689.265, + "args": { + "External id": 992298,"Record function id": 0, "Ev Idx": 10281 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338708, "tid": 2379421, + "ts": 6345941884136.918, "dur": 651.159, + "args": { + "External id": 992299,"Record function id": 0, "Ev Idx": 10282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941884213.544, "dur": 7.314, + "args": { + "External id": 992300,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941884239.903, "dur": 32.953, + "args": { + "External id": 992301,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884246.013, "dur": 2.288, + "args": { + "External id": 992302,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884250.836, "dur": 0.507, + "args": { + "External id": 992303,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884253.189, "dur": 0.871, + "args": { + "External id": 992304,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884255.523, "dur": 2.576, + "args": { + "External id": 992305,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884259.727, "dur": 0.502, + "args": { + "External id": 992306,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884261.911, "dur": 0.312, + "args": { + "External id": 992307,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884263.609, "dur": 0.556, + "args": { + "External id": 992308,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884265.979, "dur": 0.493, + "args": { + "External id": 992309,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884268.026, "dur": 0.337, + "args": { + "External id": 992310,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941884294.718, "dur": 59.521, + "args": { + "External id": 992311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941884391.369, "dur": 129.839, + "args": { + "External id": 992312,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941884403.961, "dur": 3.724, + "args": { + "External id": 992313,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941884414.520, "dur": 12.618, + "args": { + "External id": 992314,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941884419.672, "dur": 6.970, + "args": { + "External id": 992315,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884424.109, "dur": 0.947, + "args": { + "External id": 992316,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941884435.327, "dur": 28.853, + "args": { + "External id": 992317,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884437.792, "dur": 2.940, + "args": { + "External id": 992318,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884442.441, "dur": 0.596, + "args": { + "External id": 992319,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884444.573, "dur": 0.870, + "args": { + "External id": 992320,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884447.486, "dur": 0.578, + "args": { + "External id": 992321,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884449.813, "dur": 0.286, + "args": { + "External id": 992322,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884451.116, "dur": 0.493, + "args": { + "External id": 992323,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884453.649, "dur": 0.414, + "args": { + "External id": 992324,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884455.393, "dur": 0.650, + "args": { + "External id": 992325,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941884457.449, "dur": 2.751, + "args": { + "External id": 992326,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941884475.819, "dur": 36.621, + "args": { + "External id": 992327,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941884572.513, "dur": 134.510, + "args": { + "External id": 992328,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941884603.060, "dur": 100.033, + "args": { + "External id": 992329,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10312, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941884613.841, "dur": 84.455, + "args": { + "External id": 992330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941884726.537, "dur": 2.421, + "args": { + "External id": 992331,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10314, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941884821.545, "dur": 2127.883, + "args": { + "External id": 992332,"Sequence number": 10552489, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10315 + } + }, + { + "ph": "f", "id": 430, "pid": 2338708, "tid": 2379421, "ts": 6345941884821.545, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941884950.598, "dur": 186.910, + "args": { + "External id": 992333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941885193.825, "dur": 47.747, + "args": { + "External id": 992334,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941885265.889, "dur": 65.730, + "args": { + "External id": 992335,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941885343.836, "dur": 37.045, + "args": { + "External id": 992336,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941885388.185, "dur": 36.029, + "args": { + "External id": 992337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941885431.952, "dur": 31.803, + "args": { + "External id": 992338,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941885474.555, "dur": 33.838, + "args": { + "External id": 992339,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941885538.249, "dur": 28.282, + "args": { + "External id": 992340,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941885588.480, "dur": 36.245, + "args": { + "External id": 992341,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941885650.616, "dur": 23.520, + "args": { + "External id": 992342,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941885688.363, "dur": 19.682, + "args": { + "External id": 992343,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941885720.576, "dur": 43.834, + "args": { + "External id": 992344,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941885768.719, "dur": 38.482, + "args": { + "External id": 992345,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941885843.985, "dur": 409.204, + "args": { + "External id": 992346,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941885954.892, "dur": 9.522, + "args": { + "External id": 992347,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941885967.435, "dur": 3.205, + "args": { + "External id": 992348,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941885972.198, "dur": 2.512, + "args": { + "External id": 992349,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941885976.102, "dur": 4.934, + "args": { + "External id": 992350,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941886112.793, "dur": 10.772, + "args": { + "External id": 992351,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941886119.579, "dur": 3.552, + "args": { + "External id": 992352,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941886126.048, "dur": 42.494, + "args": { + "External id": 992353,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941886133.608, "dur": 2.217, + "args": { + "External id": 992354,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941886170.704, "dur": 2.507, + "args": { + "External id": 992355,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941886172.102, "dur": 1.021, + "args": { + "External id": 992356,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941886174.505, "dur": 18.122, + "args": { + "External id": 992357,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941886176.904, "dur": 0.709, + "args": { + "External id": 992358,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941886302.131, "dur": 34.306, + "args": { + "External id": 992359,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941886358.711, "dur": 19.553, + "args": { + "External id": 992360,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941886389.890, "dur": 63.082, + "args": { + "External id": 992361,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941886462.324, "dur": 48.707, + "args": { + "External id": 992362,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941886524.622, "dur": 25.682, + "args": { + "External id": 992363,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941886561.297, "dur": 40.387, + "args": { + "External id": 992364,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941886610.861, "dur": 34.378, + "args": { + "External id": 992365,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941886654.759, "dur": 37.066, + "args": { + "External id": 992366,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941886714.246, "dur": 30.280, + "args": { + "External id": 992367,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941886763.350, "dur": 30.970, + "args": { + "External id": 992368,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941886812.588, "dur": 21.152, + "args": { + "External id": 992369,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941886851.057, "dur": 19.410, + "args": { + "External id": 992370,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941886889.907, "dur": 21.043, + "args": { + "External id": 992371,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887000.455, "dur": 40.783, + "args": { + "External id": 992372,"Record function id": 0, "Ev Idx": 10355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887004.447, "dur": 34.963, + "args": { + "External id": 992373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887029.410, "dur": 8.349, + "args": { + "External id": 992374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887031.846, "dur": 5.501, + "args": { + "External id": 992375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887048.655, "dur": 44.946, + "args": { + "External id": 992376,"Record function id": 0, "Ev Idx": 10359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887050.710, "dur": 4.593, + "args": { + "External id": 992377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887052.052, "dur": 2.397, + "args": { + "External id": 992378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887053.167, "dur": 1.185, + "args": { + "External id": 992379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887101.610, "dur": 8.339, + "args": { + "External id": 992380,"Record function id": 0, "Ev Idx": 10363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887103.839, "dur": 5.506, + "args": { + "External id": 992381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887105.544, "dur": 2.750, + "args": { + "External id": 992382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887106.424, "dur": 1.779, + "args": { + "External id": 992383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887113.983, "dur": 4.721, + "args": { + "External id": 992384,"Record function id": 0, "Ev Idx": 10367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887115.168, "dur": 2.998, + "args": { + "External id": 992385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887115.822, "dur": 1.740, + "args": { + "External id": 992386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887116.414, "dur": 1.052, + "args": { + "External id": 992387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887122.582, "dur": 4.713, + "args": { + "External id": 992388,"Record function id": 0, "Ev Idx": 10371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887123.961, "dur": 2.747, + "args": { + "External id": 992389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887124.801, "dur": 1.262, + "args": { + "External id": 992390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887125.204, "dur": 0.780, + "args": { + "External id": 992391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887131.401, "dur": 7.878, + "args": { + "External id": 992392,"Record function id": 0, "Ev Idx": 10375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887132.883, "dur": 5.868, + "args": { + "External id": 992393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887133.508, "dur": 4.542, + "args": { + "External id": 992394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887134.281, "dur": 3.635, + "args": { + "External id": 992395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887143.319, "dur": 4.735, + "args": { + "External id": 992396,"Record function id": 0, "Ev Idx": 10379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887144.721, "dur": 2.771, + "args": { + "External id": 992397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887145.467, "dur": 1.307, + "args": { + "External id": 992398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887145.817, "dur": 0.880, + "args": { + "External id": 992399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887151.964, "dur": 4.907, + "args": { + "External id": 992400,"Record function id": 0, "Ev Idx": 10383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887153.499, "dur": 2.838, + "args": { + "External id": 992401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887154.191, "dur": 1.379, + "args": { + "External id": 992402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887154.572, "dur": 0.904, + "args": { + "External id": 992403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887161.238, "dur": 6.095, + "args": { + "External id": 992404,"Record function id": 0, "Ev Idx": 10387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941887162.874, "dur": 3.912, + "args": { + "External id": 992405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887163.874, "dur": 2.246, + "args": { + "External id": 992406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941887164.932, "dur": 1.025, + "args": { + "External id": 992407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941887172.743, "dur": 68435.434, + "args": { + "External id": 992408,"Record function id": 0, "Sequence number": 10552488, "Fwd thread id": 1, "Ev Idx": 10391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941887174.407, "dur": 68423.423, + "args": { + "External id": 992409,"Sequence number": 10552488, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10392 + } + }, + { + "ph": "f", "id": 431, "pid": 2338708, "tid": 2379421, "ts": 6345941887174.407, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 2338708, "tid": 2379421, + "ts": 6345941887212.107, "dur": 48.211, + "args": { + "External id": 992410,"Record function id": 0, "Ev Idx": 10393 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 2338708, "tid": 2379421, + "ts": 6345941887270.065, "dur": 84.592, + "args": { + "External id": 992411,"Record function id": 0, "Ev Idx": 10394 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 2338708, "tid": 2379421, + "ts": 6345941887361.788, "dur": 68226.598, + "args": { + "External id": 992412,"Record function id": 0, "Ev Idx": 10395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941887471.951, "dur": 8.796, + "args": { + "External id": 992413,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941887492.844, "dur": 5.852, + "args": { + "External id": 992414,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941887516.870, "dur": 66964.024, + "args": { + "External id": 992415,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941887533.175, "dur": 66932.688, + "args": { + "External id": 992416,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941887659.788, "dur": 23.441, + "args": { + "External id": 992417,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941887710.943, "dur": 66709.589, + "args": { + "External id": 992418,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941887714.002, "dur": 66705.884, + "args": { + "External id": 992419,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941887719.351, "dur": 11.339, + "args": { + "External id": 992420,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941887733.144, "dur": 66683.876, + "args": { + "External id": 992421,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941954609.344, "dur": 13.174, + "args": { + "External id": 992422,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941954613.087, "dur": 8.964, + "args": { + "External id": 992423,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941954657.494, "dur": 470.428, + "args": { + "External id": 992424,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941954694.311, "dur": 427.465, + "args": { + "External id": 992425,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10408, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941954708.048, "dur": 405.857, + "args": { + "External id": 992426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941955154.419, "dur": 3.219, + "args": { + "External id": 992427,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10410, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941955235.724, "dur": 7.630, + "args": { + "External id": 992428,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941955258.116, "dur": 45.861, + "args": { + "External id": 992429,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941955316.189, "dur": 2.199, + "args": { + "External id": 992430,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941955325.594, "dur": 14.718, + "args": { + "External id": 992431,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941955347.341, "dur": 3.727, + "args": { + "External id": 992432,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941955356.731, "dur": 14.935, + "args": { + "External id": 992433,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941955377.756, "dur": 1.316, + "args": { + "External id": 992434,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941955383.525, "dur": 12.590, + "args": { + "External id": 992435,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941955401.168, "dur": 1.182, + "args": { + "External id": 992436,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941955406.891, "dur": 12.289, + "args": { + "External id": 992437,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941955424.171, "dur": 1.481, + "args": { + "External id": 992438,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941955430.565, "dur": 11.932, + "args": { + "External id": 992439,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941955447.980, "dur": 0.885, + "args": { + "External id": 992440,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941955453.856, "dur": 12.065, + "args": { + "External id": 992441,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941955474.120, "dur": 1.125, + "args": { + "External id": 992442,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941955480.056, "dur": 12.556, + "args": { + "External id": 992443,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941955498.490, "dur": 0.897, + "args": { + "External id": 992444,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345941955504.443, "dur": 14.078, + "args": { + "External id": 992445,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941955625.734, "dur": 3506.374, + "args": { + "External id": 992446,"Record function id": 0, "Ev Idx": 10429 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 2338708, "tid": 2379421, + "ts": 6345941955650.411, "dur": 1243.896, + "args": { + "External id": 992447,"Record function id": 0, "Ev Idx": 10430 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338708, "tid": 2379421, + "ts": 6345941955670.080, "dur": 440.541, + "args": { + "External id": 992448,"Record function id": 0, "Ev Idx": 10431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941955772.664, "dur": 5.245, + "args": { + "External id": 992449,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941955781.453, "dur": 3.704, + "args": { + "External id": 992450,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941955787.584, "dur": 1.188, + "args": { + "External id": 992451,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941955790.792, "dur": 1.133, + "args": { + "External id": 992452,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941955794.191, "dur": 0.928, + "args": { + "External id": 992453,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941955799.391, "dur": 1.055, + "args": { + "External id": 992454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941955802.400, "dur": 1.071, + "args": { + "External id": 992455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941955805.351, "dur": 2.255, + "args": { + "External id": 992456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941955809.146, "dur": 1.108, + "args": { + "External id": 992457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941955814.204, "dur": 2.902, + "args": { + "External id": 992458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941955839.119, "dur": 191.699, + "args": { + "External id": 992459,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941955857.578, "dur": 146.922, + "args": { + "External id": 992460,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941955875.709, "dur": 17.860, + "args": { + "External id": 992461,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941955898.004, "dur": 75.181, + "args": { + "External id": 992462,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941955902.021, "dur": 70.763, + "args": { + "External id": 992463,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941955906.573, "dur": 6.188, + "args": { + "External id": 992464,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941955914.983, "dur": 57.019, + "args": { + "External id": 992465,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10448 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 2338708, "tid": 2379421, + "ts": 6345941956224.345, "dur": 660.518, + "args": { + "External id": 992466,"Record function id": 0, "Ev Idx": 10449 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338708, "tid": 2379421, + "ts": 6345941956244.543, "dur": 625.784, + "args": { + "External id": 992467,"Record function id": 0, "Ev Idx": 10450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941956324.452, "dur": 7.034, + "args": { + "External id": 992468,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941956349.920, "dur": 33.947, + "args": { + "External id": 992469,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956356.534, "dur": 2.123, + "args": { + "External id": 992470,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956361.108, "dur": 0.459, + "args": { + "External id": 992471,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956363.470, "dur": 0.576, + "args": { + "External id": 992472,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956365.104, "dur": 0.542, + "args": { + "External id": 992473,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956367.355, "dur": 2.877, + "args": { + "External id": 992474,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956371.926, "dur": 0.719, + "args": { + "External id": 992475,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956373.679, "dur": 0.474, + "args": { + "External id": 992476,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956376.216, "dur": 0.589, + "args": { + "External id": 992477,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956378.667, "dur": 0.403, + "args": { + "External id": 992478,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941956396.659, "dur": 50.906, + "args": { + "External id": 992479,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345941956482.836, "dur": 125.973, + "args": { + "External id": 992480,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941956494.817, "dur": 3.511, + "args": { + "External id": 992481,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345941956504.639, "dur": 11.814, + "args": { + "External id": 992482,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345941956509.865, "dur": 6.108, + "args": { + "External id": 992483,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956513.726, "dur": 0.780, + "args": { + "External id": 992484,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345941956524.539, "dur": 25.566, + "args": { + "External id": 992485,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956526.781, "dur": 0.837, + "args": { + "External id": 992486,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956529.561, "dur": 2.879, + "args": { + "External id": 992487,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956534.438, "dur": 0.395, + "args": { + "External id": 992488,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956536.166, "dur": 0.459, + "args": { + "External id": 992489,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956538.155, "dur": 0.449, + "args": { + "External id": 992490,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956540.353, "dur": 0.367, + "args": { + "External id": 992491,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956541.655, "dur": 0.512, + "args": { + "External id": 992492,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956543.793, "dur": 0.327, + "args": { + "External id": 992493,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941956545.663, "dur": 0.528, + "args": { + "External id": 992494,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941956561.940, "dur": 35.602, + "args": { + "External id": 992495,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345941956659.282, "dur": 132.843, + "args": { + "External id": 992496,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941956685.849, "dur": 102.267, + "args": { + "External id": 992497,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10480, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345941956699.401, "dur": 83.541, + "args": { + "External id": 992498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345941956811.065, "dur": 2.309, + "args": { + "External id": 992499,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10482, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941956903.454, "dur": 2201.586, + "args": { + "External id": 992500,"Sequence number": 10552487, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10483 + } + }, + { + "ph": "f", "id": 432, "pid": 2338708, "tid": 2379421, "ts": 6345941956903.454, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941957092.601, "dur": 127.545, + "args": { + "External id": 992501,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941957273.560, "dur": 50.553, + "args": { + "External id": 992502,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345941957347.856, "dur": 57.397, + "args": { + "External id": 992503,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941957416.845, "dur": 36.910, + "args": { + "External id": 992504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941957461.280, "dur": 37.687, + "args": { + "External id": 992505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941957506.339, "dur": 32.372, + "args": { + "External id": 992506,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941957549.728, "dur": 34.323, + "args": { + "External id": 992507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941957614.060, "dur": 30.586, + "args": { + "External id": 992508,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345941957667.249, "dur": 35.877, + "args": { + "External id": 992509,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941957728.137, "dur": 26.254, + "args": { + "External id": 992510,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941957772.177, "dur": 19.583, + "args": { + "External id": 992511,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941957808.623, "dur": 45.759, + "args": { + "External id": 992512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941957859.084, "dur": 40.464, + "args": { + "External id": 992513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345941957937.171, "dur": 404.054, + "args": { + "External id": 992514,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941958104.287, "dur": 13.557, + "args": { + "External id": 992515,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941958121.193, "dur": 3.525, + "args": { + "External id": 992516,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941958126.584, "dur": 2.757, + "args": { + "External id": 992517,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941958130.485, "dur": 4.496, + "args": { + "External id": 992518,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941958200.708, "dur": 6.178, + "args": { + "External id": 992519,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941958203.013, "dur": 3.636, + "args": { + "External id": 992520,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941958209.212, "dur": 42.037, + "args": { + "External id": 992521,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941958219.292, "dur": 2.057, + "args": { + "External id": 992522,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345941958252.985, "dur": 2.273, + "args": { + "External id": 992523,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941958254.337, "dur": 0.823, + "args": { + "External id": 992524,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345941958256.065, "dur": 23.056, + "args": { + "External id": 992525,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941958258.400, "dur": 0.787, + "args": { + "External id": 992526,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345941958387.834, "dur": 48.794, + "args": { + "External id": 992527,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941958457.939, "dur": 22.206, + "args": { + "External id": 992528,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941958490.654, "dur": 65.430, + "args": { + "External id": 992529,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941958565.275, "dur": 50.222, + "args": { + "External id": 992530,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941958626.191, "dur": 28.354, + "args": { + "External id": 992531,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941958664.709, "dur": 37.206, + "args": { + "External id": 992532,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941958711.021, "dur": 35.332, + "args": { + "External id": 992533,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345941958755.618, "dur": 37.445, + "args": { + "External id": 992534,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345941958816.533, "dur": 32.035, + "args": { + "External id": 992535,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941958867.763, "dur": 31.533, + "args": { + "External id": 992536,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345941958916.786, "dur": 22.283, + "args": { + "External id": 992537,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345941958956.796, "dur": 17.291, + "args": { + "External id": 992538,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345941958987.453, "dur": 19.410, + "args": { + "External id": 992539,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959159.602, "dur": 19.297, + "args": { + "External id": 992540,"Record function id": 0, "Ev Idx": 10523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959163.825, "dur": 13.890, + "args": { + "External id": 992541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959168.975, "dur": 7.520, + "args": { + "External id": 992542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959171.355, "dur": 4.986, + "args": { + "External id": 992543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959183.671, "dur": 5.593, + "args": { + "External id": 992544,"Record function id": 0, "Ev Idx": 10527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959185.181, "dur": 3.567, + "args": { + "External id": 992545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959185.954, "dur": 2.138, + "args": { + "External id": 992546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959186.762, "dur": 1.232, + "args": { + "External id": 992547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959193.259, "dur": 5.202, + "args": { + "External id": 992548,"Record function id": 0, "Ev Idx": 10531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959194.690, "dur": 3.244, + "args": { + "External id": 992549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959195.505, "dur": 1.848, + "args": { + "External id": 992550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959196.081, "dur": 1.185, + "args": { + "External id": 992551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959202.378, "dur": 4.223, + "args": { + "External id": 992552,"Record function id": 0, "Ev Idx": 10535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959203.648, "dur": 2.439, + "args": { + "External id": 992553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959204.294, "dur": 1.327, + "args": { + "External id": 992554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959204.651, "dur": 0.889, + "args": { + "External id": 992555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959210.418, "dur": 4.663, + "args": { + "External id": 992556,"Record function id": 0, "Ev Idx": 10539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959211.833, "dur": 2.753, + "args": { + "External id": 992557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959212.483, "dur": 1.576, + "args": { + "External id": 992558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959212.816, "dur": 1.160, + "args": { + "External id": 992559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959219.061, "dur": 7.476, + "args": { + "External id": 992560,"Record function id": 0, "Ev Idx": 10543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959220.373, "dur": 5.644, + "args": { + "External id": 992561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959221.081, "dur": 4.393, + "args": { + "External id": 992562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959221.738, "dur": 3.606, + "args": { + "External id": 992563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959230.642, "dur": 4.617, + "args": { + "External id": 992564,"Record function id": 0, "Ev Idx": 10547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959231.968, "dur": 2.729, + "args": { + "External id": 992565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959232.641, "dur": 1.432, + "args": { + "External id": 992566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959233.043, "dur": 0.954, + "args": { + "External id": 992567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959239.019, "dur": 4.384, + "args": { + "External id": 992568,"Record function id": 0, "Ev Idx": 10551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959240.289, "dur": 2.593, + "args": { + "External id": 992569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959240.898, "dur": 1.344, + "args": { + "External id": 992570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959241.227, "dur": 0.911, + "args": { + "External id": 992571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959247.752, "dur": 4.716, + "args": { + "External id": 992572,"Record function id": 0, "Ev Idx": 10555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345941959249.153, "dur": 2.826, + "args": { + "External id": 992573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959249.779, "dur": 1.727, + "args": { + "External id": 992574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345941959250.516, "dur": 0.848, + "args": { + "External id": 992575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941959258.403, "dur": 69409.532, + "args": { + "External id": 992576,"Record function id": 0, "Sequence number": 10552486, "Fwd thread id": 1, "Ev Idx": 10559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345941959259.957, "dur": 69397.045, + "args": { + "External id": 992577,"Sequence number": 10552486, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10560 + } + }, + { + "ph": "f", "id": 433, "pid": 2338708, "tid": 2379421, "ts": 6345941959259.957, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 2338708, "tid": 2379421, + "ts": 6345941959295.679, "dur": 48.211, + "args": { + "External id": 992578,"Record function id": 0, "Ev Idx": 10561 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 2338708, "tid": 2379421, + "ts": 6345941959353.348, "dur": 80.636, + "args": { + "External id": 992579,"Record function id": 0, "Ev Idx": 10562 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 2338708, "tid": 2379421, + "ts": 6345941959441.433, "dur": 69206.113, + "args": { + "External id": 992580,"Record function id": 0, "Ev Idx": 10563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941959552.881, "dur": 7.540, + "args": { + "External id": 992581,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345941959572.446, "dur": 5.670, + "args": { + "External id": 992582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941959594.521, "dur": 67939.659, + "args": { + "External id": 992583,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345941959614.365, "dur": 67905.319, + "args": { + "External id": 992584,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345941959733.609, "dur": 20.157, + "args": { + "External id": 992585,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345941959789.236, "dur": 67684.428, + "args": { + "External id": 992586,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345941959792.753, "dur": 67679.824, + "args": { + "External id": 992587,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345941959800.059, "dur": 12.704, + "args": { + "External id": 992588,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345941959815.677, "dur": 67653.767, + "args": { + "External id": 992589,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942027661.506, "dur": 12.311, + "args": { + "External id": 992590,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942027665.286, "dur": 8.090, + "args": { + "External id": 992591,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942027710.926, "dur": 475.073, + "args": { + "External id": 992592,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942027751.511, "dur": 427.777, + "args": { + "External id": 992593,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10576, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942027765.218, "dur": 406.752, + "args": { + "External id": 992594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942028214.062, "dur": 2.878, + "args": { + "External id": 992595,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10578, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942028294.620, "dur": 7.753, + "args": { + "External id": 992596,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942028316.726, "dur": 42.807, + "args": { + "External id": 992597,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942028372.254, "dur": 4.178, + "args": { + "External id": 992598,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942028383.128, "dur": 16.177, + "args": { + "External id": 992599,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942028405.873, "dur": 1.127, + "args": { + "External id": 992600,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942028412.236, "dur": 14.404, + "args": { + "External id": 992601,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942028432.763, "dur": 0.837, + "args": { + "External id": 992602,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942028437.853, "dur": 13.650, + "args": { + "External id": 992603,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942028456.501, "dur": 1.019, + "args": { + "External id": 992604,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942028462.203, "dur": 14.884, + "args": { + "External id": 992605,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942028481.967, "dur": 1.042, + "args": { + "External id": 992606,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942028488.120, "dur": 12.516, + "args": { + "External id": 992607,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942028506.414, "dur": 1.200, + "args": { + "External id": 992608,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942028512.014, "dur": 14.420, + "args": { + "External id": 992609,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942028531.899, "dur": 0.933, + "args": { + "External id": 992610,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942028537.620, "dur": 12.478, + "args": { + "External id": 992611,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942028557.376, "dur": 0.898, + "args": { + "External id": 992612,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942028563.354, "dur": 14.001, + "args": { + "External id": 992613,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942028684.491, "dur": 3506.007, + "args": { + "External id": 992614,"Record function id": 0, "Ev Idx": 10597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 2338708, "tid": 2379421, + "ts": 6345942028709.287, "dur": 1248.377, + "args": { + "External id": 992615,"Record function id": 0, "Ev Idx": 10598 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338708, "tid": 2379421, + "ts": 6345942028729.174, "dur": 443.954, + "args": { + "External id": 992616,"Record function id": 0, "Ev Idx": 10599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942028830.735, "dur": 7.204, + "args": { + "External id": 992617,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942028841.770, "dur": 1.143, + "args": { + "External id": 992618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942028845.431, "dur": 1.519, + "args": { + "External id": 992619,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942028849.195, "dur": 0.838, + "args": { + "External id": 992620,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942028852.086, "dur": 1.108, + "args": { + "External id": 992621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942028854.829, "dur": 1.009, + "args": { + "External id": 992622,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942028857.701, "dur": 0.692, + "args": { + "External id": 992623,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942028862.245, "dur": 2.309, + "args": { + "External id": 992624,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942028866.214, "dur": 2.617, + "args": { + "External id": 992625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942028870.377, "dur": 0.660, + "args": { + "External id": 992626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942028891.751, "dur": 238.558, + "args": { + "External id": 992627,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942028909.661, "dur": 213.866, + "args": { + "External id": 992628,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942028926.998, "dur": 16.869, + "args": { + "External id": 992629,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942028950.018, "dur": 99.446, + "args": { + "External id": 992630,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942028953.224, "dur": 95.737, + "args": { + "External id": 992631,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942028958.313, "dur": 7.739, + "args": { + "External id": 992632,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942028968.445, "dur": 79.307, + "args": { + "External id": 992633,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10616 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 2338708, "tid": 2379421, + "ts": 6345942029284.931, "dur": 663.711, + "args": { + "External id": 992634,"Record function id": 0, "Ev Idx": 10617 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338708, "tid": 2379421, + "ts": 6345942029304.395, "dur": 628.770, + "args": { + "External id": 992635,"Record function id": 0, "Ev Idx": 10618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942029381.589, "dur": 7.525, + "args": { + "External id": 992636,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345942029407.836, "dur": 34.484, + "args": { + "External id": 992637,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029413.966, "dur": 2.054, + "args": { + "External id": 992638,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029419.344, "dur": 0.677, + "args": { + "External id": 992639,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029421.944, "dur": 0.675, + "args": { + "External id": 992640,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029423.727, "dur": 2.806, + "args": { + "External id": 992641,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029428.735, "dur": 0.515, + "args": { + "External id": 992642,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029431.384, "dur": 0.381, + "args": { + "External id": 992643,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029432.859, "dur": 0.553, + "args": { + "External id": 992644,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029435.187, "dur": 0.614, + "args": { + "External id": 992645,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029437.542, "dur": 0.452, + "args": { + "External id": 992646,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942029455.369, "dur": 46.961, + "args": { + "External id": 992647,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345942029538.106, "dur": 127.043, + "args": { + "External id": 992648,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942029549.770, "dur": 3.537, + "args": { + "External id": 992649,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345942029559.732, "dur": 12.201, + "args": { + "External id": 992650,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345942029564.976, "dur": 6.457, + "args": { + "External id": 992651,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029569.132, "dur": 0.978, + "args": { + "External id": 992652,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345942029580.052, "dur": 29.178, + "args": { + "External id": 992653,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029582.434, "dur": 3.121, + "args": { + "External id": 992654,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029587.165, "dur": 0.655, + "args": { + "External id": 992655,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029589.464, "dur": 0.583, + "args": { + "External id": 992656,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029591.979, "dur": 0.456, + "args": { + "External id": 992657,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029594.364, "dur": 0.407, + "args": { + "External id": 992658,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029595.817, "dur": 0.477, + "args": { + "External id": 992659,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029598.250, "dur": 0.611, + "args": { + "External id": 992660,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029600.797, "dur": 0.573, + "args": { + "External id": 992661,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942029602.329, "dur": 2.917, + "args": { + "External id": 992662,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942029621.197, "dur": 35.273, + "args": { + "External id": 992663,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942029713.493, "dur": 141.611, + "args": { + "External id": 992664,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942029752.241, "dur": 98.856, + "args": { + "External id": 992665,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10648, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942029763.034, "dur": 83.108, + "args": { + "External id": 992666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942029874.652, "dur": 2.327, + "args": { + "External id": 992667,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10650, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942029966.475, "dur": 2198.858, + "args": { + "External id": 992668,"Sequence number": 10552485, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10651 + } + }, + { + "ph": "f", "id": 434, "pid": 2338708, "tid": 2379421, "ts": 6345942029966.475, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942030171.029, "dur": 127.923, + "args": { + "External id": 992669,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942030353.673, "dur": 50.411, + "args": { + "External id": 992670,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345942030427.109, "dur": 58.191, + "args": { + "External id": 992671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942030498.036, "dur": 37.440, + "args": { + "External id": 992672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942030543.149, "dur": 37.871, + "args": { + "External id": 992673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942030588.766, "dur": 32.252, + "args": { + "External id": 992674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942030631.201, "dur": 34.446, + "args": { + "External id": 992675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942030696.016, "dur": 28.544, + "args": { + "External id": 992676,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942030748.616, "dur": 34.564, + "args": { + "External id": 992677,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942030810.852, "dur": 25.875, + "args": { + "External id": 992678,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942030854.717, "dur": 19.313, + "args": { + "External id": 992679,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942030886.672, "dur": 46.295, + "args": { + "External id": 992680,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942030938.376, "dur": 39.808, + "args": { + "External id": 992681,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345942031034.721, "dur": 384.520, + "args": { + "External id": 992682,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942031199.962, "dur": 10.983, + "args": { + "External id": 992683,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942031214.442, "dur": 3.167, + "args": { + "External id": 992684,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942031219.381, "dur": 6.194, + "args": { + "External id": 992685,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942031227.094, "dur": 4.728, + "args": { + "External id": 992686,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345942031296.159, "dur": 6.689, + "args": { + "External id": 992687,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942031298.877, "dur": 3.731, + "args": { + "External id": 992688,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345942031305.023, "dur": 38.876, + "args": { + "External id": 992689,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942031311.704, "dur": 2.057, + "args": { + "External id": 992690,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345942031345.917, "dur": 2.238, + "args": { + "External id": 992691,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942031347.177, "dur": 0.881, + "args": { + "External id": 992692,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345942031349.516, "dur": 18.938, + "args": { + "External id": 992693,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942031351.728, "dur": 0.767, + "args": { + "External id": 992694,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942031466.029, "dur": 34.027, + "args": { + "External id": 992695,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942031524.230, "dur": 21.073, + "args": { + "External id": 992696,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942031556.568, "dur": 62.180, + "args": { + "External id": 992697,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942031627.428, "dur": 49.104, + "args": { + "External id": 992698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942031686.946, "dur": 26.860, + "args": { + "External id": 992699,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942031723.311, "dur": 39.521, + "args": { + "External id": 992700,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942031771.810, "dur": 33.337, + "args": { + "External id": 992701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942031814.323, "dur": 35.707, + "args": { + "External id": 992702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345942031871.700, "dur": 28.244, + "args": { + "External id": 992703,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942031918.209, "dur": 30.159, + "args": { + "External id": 992704,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942031965.564, "dur": 20.527, + "args": { + "External id": 992705,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942032001.759, "dur": 37.312, + "args": { + "External id": 992706,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345942032099.353, "dur": 24.035, + "args": { + "External id": 992707,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032217.424, "dur": 18.047, + "args": { + "External id": 992708,"Record function id": 0, "Ev Idx": 10691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032221.454, "dur": 12.926, + "args": { + "External id": 992709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032226.425, "dur": 6.908, + "args": { + "External id": 992710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032228.522, "dur": 4.665, + "args": { + "External id": 992711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032240.344, "dur": 5.812, + "args": { + "External id": 992712,"Record function id": 0, "Ev Idx": 10695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032241.680, "dur": 3.857, + "args": { + "External id": 992713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032242.612, "dur": 2.338, + "args": { + "External id": 992714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032243.478, "dur": 1.359, + "args": { + "External id": 992715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032250.196, "dur": 5.452, + "args": { + "External id": 992716,"Record function id": 0, "Ev Idx": 10699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032251.751, "dur": 3.368, + "args": { + "External id": 992717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032252.653, "dur": 1.852, + "args": { + "External id": 992718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032253.085, "dur": 1.340, + "args": { + "External id": 992719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032259.508, "dur": 5.074, + "args": { + "External id": 992720,"Record function id": 0, "Ev Idx": 10703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032260.830, "dur": 3.220, + "args": { + "External id": 992721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032261.658, "dur": 1.760, + "args": { + "External id": 992722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032262.556, "dur": 0.782, + "args": { + "External id": 992723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032268.408, "dur": 5.530, + "args": { + "External id": 992724,"Record function id": 0, "Ev Idx": 10707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032269.725, "dur": 3.700, + "args": { + "External id": 992725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032270.428, "dur": 2.297, + "args": { + "External id": 992726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032271.170, "dur": 1.475, + "args": { + "External id": 992727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032278.042, "dur": 5.016, + "args": { + "External id": 992728,"Record function id": 0, "Ev Idx": 10711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032279.764, "dur": 2.735, + "args": { + "External id": 992729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032280.442, "dur": 1.476, + "args": { + "External id": 992730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032280.980, "dur": 0.815, + "args": { + "External id": 992731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032287.088, "dur": 6.758, + "args": { + "External id": 992732,"Record function id": 0, "Ev Idx": 10715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032288.259, "dur": 5.073, + "args": { + "External id": 992733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032289.147, "dur": 3.695, + "args": { + "External id": 992734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032289.966, "dur": 2.803, + "args": { + "External id": 992735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032297.871, "dur": 4.828, + "args": { + "External id": 992736,"Record function id": 0, "Ev Idx": 10719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032299.204, "dur": 2.950, + "args": { + "External id": 992737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032299.918, "dur": 1.423, + "args": { + "External id": 992738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032300.394, "dur": 0.839, + "args": { + "External id": 992739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032307.288, "dur": 4.279, + "args": { + "External id": 992740,"Record function id": 0, "Ev Idx": 10723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942032308.492, "dur": 2.588, + "args": { + "External id": 992741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032309.030, "dur": 1.402, + "args": { + "External id": 992742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942032309.542, "dur": 0.756, + "args": { + "External id": 992743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942032317.114, "dur": 70606.062, + "args": { + "External id": 992744,"Record function id": 0, "Sequence number": 10552484, "Fwd thread id": 1, "Ev Idx": 10727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942032318.844, "dur": 70593.584, + "args": { + "External id": 992745,"Sequence number": 10552484, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10728 + } + }, + { + "ph": "f", "id": 435, "pid": 2338708, "tid": 2379421, "ts": 6345942032318.844, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 2338708, "tid": 2379421, + "ts": 6345942032355.299, "dur": 45.299, + "args": { + "External id": 992746,"Record function id": 0, "Ev Idx": 10729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 2338708, "tid": 2379421, + "ts": 6345942032409.648, "dur": 81.735, + "args": { + "External id": 992747,"Record function id": 0, "Ev Idx": 10730 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 2338708, "tid": 2379421, + "ts": 6345942032498.372, "dur": 70404.981, + "args": { + "External id": 992748,"Record function id": 0, "Ev Idx": 10731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942032605.004, "dur": 8.474, + "args": { + "External id": 992749,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942032624.762, "dur": 5.857, + "args": { + "External id": 992750,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942032648.071, "dur": 69069.596, + "args": { + "External id": 992751,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942032664.011, "dur": 69038.677, + "args": { + "External id": 992752,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942032795.741, "dur": 21.788, + "args": { + "External id": 992753,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942032842.690, "dur": 68811.687, + "args": { + "External id": 992754,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942032849.417, "dur": 68803.797, + "args": { + "External id": 992755,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942032855.605, "dur": 11.249, + "args": { + "External id": 992756,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942032869.304, "dur": 68778.389, + "args": { + "External id": 992757,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942101842.277, "dur": 14.202, + "args": { + "External id": 992758,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942101846.575, "dur": 9.460, + "args": { + "External id": 992759,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942101892.636, "dur": 518.415, + "args": { + "External id": 992760,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942101930.791, "dur": 473.274, + "args": { + "External id": 992761,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10744, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942101943.696, "dur": 451.680, + "args": { + "External id": 992762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942102439.391, "dur": 2.892, + "args": { + "External id": 992763,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10746, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942102519.840, "dur": 8.165, + "args": { + "External id": 992764,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942102542.614, "dur": 42.206, + "args": { + "External id": 992765,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942102596.965, "dur": 4.295, + "args": { + "External id": 992766,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942102608.043, "dur": 15.095, + "args": { + "External id": 992767,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942102630.264, "dur": 0.950, + "args": { + "External id": 992768,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942102665.489, "dur": 16.192, + "args": { + "External id": 992769,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942102694.843, "dur": 1.100, + "args": { + "External id": 992770,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942102701.548, "dur": 13.352, + "args": { + "External id": 992771,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942102719.960, "dur": 0.935, + "args": { + "External id": 992772,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942102725.799, "dur": 13.237, + "args": { + "External id": 992773,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942102743.664, "dur": 1.108, + "args": { + "External id": 992774,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942102749.901, "dur": 11.640, + "args": { + "External id": 992775,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942102768.650, "dur": 0.937, + "args": { + "External id": 992776,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942102774.441, "dur": 11.818, + "args": { + "External id": 992777,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942102791.076, "dur": 0.936, + "args": { + "External id": 992778,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942102797.290, "dur": 11.494, + "args": { + "External id": 992779,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942102813.856, "dur": 1.027, + "args": { + "External id": 992780,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942102819.961, "dur": 11.820, + "args": { + "External id": 992781,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942102940.501, "dur": 3508.652, + "args": { + "External id": 992782,"Record function id": 0, "Ev Idx": 10765 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 2338708, "tid": 2379421, + "ts": 6345942102966.025, "dur": 1325.483, + "args": { + "External id": 992783,"Record function id": 0, "Ev Idx": 10766 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338708, "tid": 2379421, + "ts": 6345942102985.593, "dur": 459.825, + "args": { + "External id": 992784,"Record function id": 0, "Ev Idx": 10767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942103158.621, "dur": 7.942, + "args": { + "External id": 992785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942103170.870, "dur": 0.965, + "args": { + "External id": 992786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942103174.144, "dur": 1.043, + "args": { + "External id": 992787,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942103177.151, "dur": 0.997, + "args": { + "External id": 992788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942103180.602, "dur": 1.142, + "args": { + "External id": 992789,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942103183.631, "dur": 0.738, + "args": { + "External id": 992790,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942103186.256, "dur": 0.753, + "args": { + "External id": 992791,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942103189.104, "dur": 2.232, + "args": { + "External id": 992792,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942103195.508, "dur": 3.181, + "args": { + "External id": 992793,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942103200.409, "dur": 0.719, + "args": { + "External id": 992794,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942103224.204, "dur": 182.091, + "args": { + "External id": 992795,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942103243.750, "dur": 156.820, + "args": { + "External id": 992796,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942103261.758, "dur": 18.005, + "args": { + "External id": 992797,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942103283.856, "dur": 86.120, + "args": { + "External id": 992798,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942103288.851, "dur": 80.779, + "args": { + "External id": 992799,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103293.896, "dur": 7.630, + "args": { + "External id": 992800,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942103305.594, "dur": 63.509, + "args": { + "External id": 992801,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10784 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 2338708, "tid": 2379421, + "ts": 6345942103552.237, "dur": 730.080, + "args": { + "External id": 992802,"Record function id": 0, "Ev Idx": 10785 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338708, "tid": 2379421, + "ts": 6345942103572.077, "dur": 694.116, + "args": { + "External id": 992803,"Record function id": 0, "Ev Idx": 10786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942103646.037, "dur": 7.073, + "args": { + "External id": 992804,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345942103671.112, "dur": 32.384, + "args": { + "External id": 992805,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103677.138, "dur": 1.735, + "args": { + "External id": 992806,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103680.845, "dur": 0.972, + "args": { + "External id": 992807,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103683.983, "dur": 0.534, + "args": { + "External id": 992808,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103686.608, "dur": 2.650, + "args": { + "External id": 992809,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103690.548, "dur": 0.520, + "args": { + "External id": 992810,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103692.932, "dur": 0.666, + "args": { + "External id": 992811,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103695.263, "dur": 0.408, + "args": { + "External id": 992812,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103696.862, "dur": 0.488, + "args": { + "External id": 992813,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103699.009, "dur": 0.551, + "args": { + "External id": 992814,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942103716.961, "dur": 49.177, + "args": { + "External id": 992815,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345942103802.380, "dur": 122.007, + "args": { + "External id": 992816,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942103813.936, "dur": 3.269, + "args": { + "External id": 992817,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345942103822.879, "dur": 12.592, + "args": { + "External id": 992818,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345942103827.631, "dur": 7.320, + "args": { + "External id": 992819,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103832.371, "dur": 0.982, + "args": { + "External id": 992820,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345942103843.876, "dur": 27.318, + "args": { + "External id": 992821,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103845.962, "dur": 2.749, + "args": { + "External id": 992822,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103850.427, "dur": 0.690, + "args": { + "External id": 992823,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103852.742, "dur": 0.523, + "args": { + "External id": 992824,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103854.996, "dur": 0.475, + "args": { + "External id": 992825,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103856.581, "dur": 0.666, + "args": { + "External id": 992826,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103859.017, "dur": 0.507, + "args": { + "External id": 992827,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103861.070, "dur": 0.697, + "args": { + "External id": 992828,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103862.964, "dur": 0.471, + "args": { + "External id": 992829,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942103864.884, "dur": 2.505, + "args": { + "External id": 992830,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942103882.971, "dur": 32.460, + "args": { + "External id": 992831,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942103974.770, "dur": 198.176, + "args": { + "External id": 992832,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942104003.951, "dur": 164.271, + "args": { + "External id": 992833,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10816, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942104034.446, "dur": 128.132, + "args": { + "External id": 992834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942104196.418, "dur": 2.637, + "args": { + "External id": 992835,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10818, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942104300.862, "dur": 2125.202, + "args": { + "External id": 992836,"Sequence number": 10552483, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10819 + } + }, + { + "ph": "f", "id": 436, "pid": 2338708, "tid": 2379421, "ts": 6345942104300.862, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942104435.723, "dur": 121.652, + "args": { + "External id": 992837,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942104605.815, "dur": 48.014, + "args": { + "External id": 992838,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345942104676.465, "dur": 58.003, + "args": { + "External id": 992839,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942104746.136, "dur": 38.714, + "args": { + "External id": 992840,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942104791.993, "dur": 37.389, + "args": { + "External id": 992841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942104836.744, "dur": 32.995, + "args": { + "External id": 992842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942104884.693, "dur": 34.262, + "args": { + "External id": 992843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942104951.059, "dur": 28.487, + "args": { + "External id": 992844,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942105003.071, "dur": 98.269, + "args": { + "External id": 992845,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942105134.968, "dur": 27.120, + "args": { + "External id": 992846,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942105180.565, "dur": 19.436, + "args": { + "External id": 992847,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942105213.871, "dur": 53.699, + "args": { + "External id": 992848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942105272.316, "dur": 55.835, + "args": { + "External id": 992849,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345942105372.657, "dur": 308.479, + "args": { + "External id": 992850,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 10833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942105477.846, "dur": 7.490, + "args": { + "External id": 992851,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942105487.876, "dur": 3.671, + "args": { + "External id": 992852,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942105493.044, "dur": 2.553, + "args": { + "External id": 992853,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942105497.249, "dur": 4.570, + "args": { + "External id": 992854,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345942105558.789, "dur": 6.338, + "args": { + "External id": 992855,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942105561.113, "dur": 3.741, + "args": { + "External id": 992856,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345942105567.123, "dur": 39.218, + "args": { + "External id": 992857,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942105573.813, "dur": 2.184, + "args": { + "External id": 992858,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345942105608.372, "dur": 2.087, + "args": { + "External id": 992859,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942105609.603, "dur": 0.760, + "args": { + "External id": 992860,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 10843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345942105611.823, "dur": 19.313, + "args": { + "External id": 992861,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 10844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942105614.191, "dur": 0.745, + "args": { + "External id": 992862,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 10845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942105721.090, "dur": 35.313, + "args": { + "External id": 992863,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942105794.834, "dur": 21.686, + "args": { + "External id": 992864,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942105826.511, "dur": 50.396, + "args": { + "External id": 992865,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942105885.329, "dur": 46.968, + "args": { + "External id": 992866,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942105942.081, "dur": 24.869, + "args": { + "External id": 992867,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 10850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942105976.065, "dur": 57.695, + "args": { + "External id": 992868,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 10851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942106047.924, "dur": 78.446, + "args": { + "External id": 992869,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942106140.264, "dur": 36.999, + "args": { + "External id": 992870,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 10853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345942106202.811, "dur": 30.907, + "args": { + "External id": 992871,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 10854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942106253.068, "dur": 28.542, + "args": { + "External id": 992872,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942106299.115, "dur": 19.496, + "args": { + "External id": 992873,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942106335.785, "dur": 17.184, + "args": { + "External id": 992874,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345942106367.398, "dur": 19.437, + "args": { + "External id": 992875,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 10858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106476.032, "dur": 19.201, + "args": { + "External id": 992876,"Record function id": 0, "Ev Idx": 10859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106480.288, "dur": 13.774, + "args": { + "External id": 992877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106485.458, "dur": 7.394, + "args": { + "External id": 992878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106488.018, "dur": 4.680, + "args": { + "External id": 992879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106499.986, "dur": 6.532, + "args": { + "External id": 992880,"Record function id": 0, "Ev Idx": 10863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106501.295, "dur": 4.508, + "args": { + "External id": 992881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106502.378, "dur": 2.645, + "args": { + "External id": 992882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106503.602, "dur": 1.317, + "args": { + "External id": 992883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106510.945, "dur": 5.603, + "args": { + "External id": 992884,"Record function id": 0, "Ev Idx": 10867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106512.593, "dur": 3.439, + "args": { + "External id": 992885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106513.264, "dur": 2.064, + "args": { + "External id": 992886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106514.191, "dur": 1.056, + "args": { + "External id": 992887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 10870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106520.525, "dur": 5.759, + "args": { + "External id": 992888,"Record function id": 0, "Ev Idx": 10871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106521.828, "dur": 3.977, + "args": { + "External id": 992889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106523.560, "dur": 1.550, + "args": { + "External id": 992890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106524.186, "dur": 0.840, + "args": { + "External id": 992891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 10874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106530.125, "dur": 5.717, + "args": { + "External id": 992892,"Record function id": 0, "Ev Idx": 10875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106532.112, "dur": 3.227, + "args": { + "External id": 992893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106532.809, "dur": 1.617, + "args": { + "External id": 992894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106533.360, "dur": 0.989, + "args": { + "External id": 992895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106539.713, "dur": 7.706, + "args": { + "External id": 992896,"Record function id": 0, "Ev Idx": 10879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106541.096, "dur": 5.803, + "args": { + "External id": 992897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106541.733, "dur": 4.526, + "args": { + "External id": 992898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106542.485, "dur": 3.659, + "args": { + "External id": 992899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 10882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106551.476, "dur": 4.989, + "args": { + "External id": 992900,"Record function id": 0, "Ev Idx": 10883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106553.280, "dur": 2.650, + "args": { + "External id": 992901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106553.907, "dur": 1.359, + "args": { + "External id": 992902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106554.258, "dur": 0.922, + "args": { + "External id": 992903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106560.289, "dur": 4.833, + "args": { + "External id": 992904,"Record function id": 0, "Ev Idx": 10887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106561.748, "dur": 2.838, + "args": { + "External id": 992905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106562.561, "dur": 1.519, + "args": { + "External id": 992906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106563.079, "dur": 0.913, + "args": { + "External id": 992907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 10890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106568.933, "dur": 4.818, + "args": { + "External id": 992908,"Record function id": 0, "Ev Idx": 10891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942106570.190, "dur": 3.020, + "args": { + "External id": 992909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106570.781, "dur": 1.818, + "args": { + "External id": 992910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942106571.612, "dur": 0.849, + "args": { + "External id": 992911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 10894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942106579.054, "dur": 63749.121, + "args": { + "External id": 992912,"Record function id": 0, "Sequence number": 10552482, "Fwd thread id": 1, "Ev Idx": 10895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942106580.840, "dur": 63735.974, + "args": { + "External id": 992913,"Sequence number": 10552482, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10896 + } + }, + { + "ph": "f", "id": 437, "pid": 2338708, "tid": 2379421, "ts": 6345942106580.840, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 2338708, "tid": 2379421, + "ts": 6345942106616.037, "dur": 48.447, + "args": { + "External id": 992914,"Record function id": 0, "Ev Idx": 10897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 2338708, "tid": 2379421, + "ts": 6345942106673.771, "dur": 81.353, + "args": { + "External id": 992915,"Record function id": 0, "Ev Idx": 10898 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 2338708, "tid": 2379421, + "ts": 6345942106763.060, "dur": 63544.314, + "args": { + "External id": 992916,"Record function id": 0, "Ev Idx": 10899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942106870.636, "dur": 8.147, + "args": { + "External id": 992917,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942106890.225, "dur": 5.373, + "args": { + "External id": 992918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942106912.773, "dur": 62298.804, + "args": { + "External id": 992919,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942106929.272, "dur": 62266.778, + "args": { + "External id": 992920,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 10903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942107115.154, "dur": 31.610, + "args": { + "External id": 992921,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942107177.490, "dur": 61973.969, + "args": { + "External id": 992922,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 10905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942107181.244, "dur": 61969.402, + "args": { + "External id": 992923,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 10906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942107191.158, "dur": 13.460, + "args": { + "External id": 992924,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942107207.434, "dur": 61940.235, + "args": { + "External id": 992925,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 10908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942169337.146, "dur": 12.401, + "args": { + "External id": 992926,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 10909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942169341.007, "dur": 8.064, + "args": { + "External id": 992927,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942169384.631, "dur": 418.442, + "args": { + "External id": 992928,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 10911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942169422.762, "dur": 374.441, + "args": { + "External id": 992929,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10912, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942169436.984, "dur": 354.027, + "args": { + "External id": 992930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 10913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942169825.770, "dur": 2.766, + "args": { + "External id": 992931,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10914, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942169897.651, "dur": 7.763, + "args": { + "External id": 992932,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942169919.468, "dur": 38.072, + "args": { + "External id": 992933,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942169969.766, "dur": 4.397, + "args": { + "External id": 992934,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942169980.522, "dur": 14.183, + "args": { + "External id": 992935,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942170001.595, "dur": 1.182, + "args": { + "External id": 992936,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942170026.750, "dur": 16.883, + "args": { + "External id": 992937,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942170087.067, "dur": 2.541, + "args": { + "External id": 992938,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942170097.570, "dur": 16.303, + "args": { + "External id": 992939,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 10922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942170120.756, "dur": 1.205, + "args": { + "External id": 992940,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942170127.135, "dur": 12.646, + "args": { + "External id": 992941,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 10924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942170144.476, "dur": 1.316, + "args": { + "External id": 992942,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942170150.414, "dur": 11.920, + "args": { + "External id": 992943,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 10926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942170167.787, "dur": 1.167, + "args": { + "External id": 992944,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942170175.783, "dur": 13.299, + "args": { + "External id": 992945,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942170194.315, "dur": 1.212, + "args": { + "External id": 992946,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942170200.910, "dur": 11.896, + "args": { + "External id": 992947,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 10930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942170217.149, "dur": 1.082, + "args": { + "External id": 992948,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942170222.997, "dur": 12.098, + "args": { + "External id": 992949,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 10932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942170346.877, "dur": 3403.272, + "args": { + "External id": 992950,"Record function id": 0, "Ev Idx": 10933 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 2338708, "tid": 2379421, + "ts": 6345942170371.719, "dur": 1253.289, + "args": { + "External id": 992951,"Record function id": 0, "Ev Idx": 10934 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338708, "tid": 2379421, + "ts": 6345942170388.750, "dur": 378.914, + "args": { + "External id": 992952,"Record function id": 0, "Ev Idx": 10935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942170493.515, "dur": 7.671, + "args": { + "External id": 992953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 10936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942170505.269, "dur": 1.456, + "args": { + "External id": 992954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942170508.762, "dur": 1.306, + "args": { + "External id": 992955,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942170512.347, "dur": 1.156, + "args": { + "External id": 992956,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942170515.368, "dur": 1.120, + "args": { + "External id": 992957,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 10940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942170520.705, "dur": 0.929, + "args": { + "External id": 992958,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 10941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942170523.437, "dur": 1.345, + "args": { + "External id": 992959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 10942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942170526.285, "dur": 2.216, + "args": { + "External id": 992960,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942170530.411, "dur": 3.310, + "args": { + "External id": 992961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942170537.648, "dur": 0.738, + "args": { + "External id": 992962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 10945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942170559.857, "dur": 170.433, + "args": { + "External id": 992963,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942170579.096, "dur": 146.056, + "args": { + "External id": 992964,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 10947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942170597.321, "dur": 17.773, + "args": { + "External id": 992965,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942170619.727, "dur": 75.835, + "args": { + "External id": 992966,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 10949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942170622.774, "dur": 72.351, + "args": { + "External id": 992967,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 10950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942170627.481, "dur": 6.166, + "args": { + "External id": 992968,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942170635.806, "dur": 58.461, + "args": { + "External id": 992969,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 10952 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 2338708, "tid": 2379421, + "ts": 6345942170873.986, "dur": 742.317, + "args": { + "External id": 992970,"Record function id": 0, "Ev Idx": 10953 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338708, "tid": 2379421, + "ts": 6345942170893.342, "dur": 707.166, + "args": { + "External id": 992971,"Record function id": 0, "Ev Idx": 10954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942170965.871, "dur": 6.090, + "args": { + "External id": 992972,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345942170990.909, "dur": 53.430, + "args": { + "External id": 992973,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942170997.130, "dur": 1.399, + "args": { + "External id": 992974,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171000.653, "dur": 0.565, + "args": { + "External id": 992975,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171003.129, "dur": 0.694, + "args": { + "External id": 992976,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171005.991, "dur": 21.690, + "args": { + "External id": 992977,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171030.629, "dur": 0.704, + "args": { + "External id": 992978,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171033.206, "dur": 0.526, + "args": { + "External id": 992979,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171035.635, "dur": 0.492, + "args": { + "External id": 992980,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171037.110, "dur": 0.393, + "args": { + "External id": 992981,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171039.515, "dur": 0.535, + "args": { + "External id": 992982,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942171096.457, "dur": 52.258, + "args": { + "External id": 992983,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345942171189.619, "dur": 136.678, + "args": { + "External id": 992984,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 10967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942171202.744, "dur": 5.308, + "args": { + "External id": 992985,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345942171214.622, "dur": 13.549, + "args": { + "External id": 992986,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345942171220.003, "dur": 7.684, + "args": { + "External id": 992987,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 10970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171224.751, "dur": 1.137, + "args": { + "External id": 992988,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 10971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345942171236.674, "dur": 34.357, + "args": { + "External id": 992989,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 10972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171239.194, "dur": 2.982, + "args": { + "External id": 992990,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171243.796, "dur": 0.821, + "args": { + "External id": 992991,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171246.497, "dur": 0.560, + "args": { + "External id": 992992,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171249.035, "dur": 0.290, + "args": { + "External id": 992993,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171250.768, "dur": 0.422, + "args": { + "External id": 992994,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171252.937, "dur": 0.425, + "args": { + "External id": 992995,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171259.645, "dur": 0.499, + "args": { + "External id": 992996,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171262.116, "dur": 0.448, + "args": { + "External id": 992997,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942171264.303, "dur": 2.637, + "args": { + "External id": 992998,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 10981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942171283.817, "dur": 32.972, + "args": { + "External id": 992999,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 10982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942171383.720, "dur": 134.375, + "args": { + "External id": 993000,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 10983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942171415.090, "dur": 99.085, + "args": { + "External id": 993001,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 10984, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942171426.482, "dur": 82.910, + "args": { + "External id": 993002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 10985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942171537.301, "dur": 2.291, + "args": { + "External id": 993003,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 10986, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942171633.915, "dur": 2091.976, + "args": { + "External id": 993004,"Sequence number": 10552481, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 10987 + } + }, + { + "ph": "f", "id": 438, "pid": 2338708, "tid": 2379421, "ts": 6345942171633.915, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942171763.919, "dur": 122.546, + "args": { + "External id": 993005,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 10988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942171933.115, "dur": 48.750, + "args": { + "External id": 993006,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 10989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345942172005.215, "dur": 128.134, + "args": { + "External id": 993007,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 10990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942172154.665, "dur": 41.479, + "args": { + "External id": 993008,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942172203.650, "dur": 38.204, + "args": { + "External id": 993009,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942172249.220, "dur": 31.235, + "args": { + "External id": 993010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 10993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942172290.972, "dur": 33.704, + "args": { + "External id": 993011,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 10994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942172362.190, "dur": 29.730, + "args": { + "External id": 993012,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 10995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942172414.898, "dur": 33.188, + "args": { + "External id": 993013,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 10996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942172475.261, "dur": 23.432, + "args": { + "External id": 993014,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 10997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942172515.151, "dur": 18.256, + "args": { + "External id": 993015,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 10998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942172546.064, "dur": 43.633, + "args": { + "External id": 993016,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 10999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942172594.437, "dur": 39.110, + "args": { + "External id": 993017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345942172666.970, "dur": 326.303, + "args": { + "External id": 993018,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942172764.491, "dur": 8.077, + "args": { + "External id": 993019,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942172775.283, "dur": 3.773, + "args": { + "External id": 993020,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942172780.648, "dur": 2.811, + "args": { + "External id": 993021,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942172784.837, "dur": 5.200, + "args": { + "External id": 993022,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345942172853.700, "dur": 6.671, + "args": { + "External id": 993023,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942172855.732, "dur": 3.962, + "args": { + "External id": 993024,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345942172863.084, "dur": 39.387, + "args": { + "External id": 993025,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942172870.444, "dur": 2.089, + "args": { + "External id": 993026,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345942172904.456, "dur": 2.622, + "args": { + "External id": 993027,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942172905.902, "dur": 1.069, + "args": { + "External id": 993028,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345942172908.148, "dur": 22.427, + "args": { + "External id": 993029,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942172911.058, "dur": 0.578, + "args": { + "External id": 993030,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942173101.416, "dur": 38.662, + "args": { + "External id": 993031,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942173162.874, "dur": 21.200, + "args": { + "External id": 993032,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942173195.989, "dur": 62.457, + "args": { + "External id": 993033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942173267.284, "dur": 48.362, + "args": { + "External id": 993034,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942173326.023, "dur": 25.568, + "args": { + "External id": 993035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942173361.843, "dur": 36.208, + "args": { + "External id": 993036,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942173407.262, "dur": 33.169, + "args": { + "External id": 993037,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942173450.010, "dur": 35.793, + "args": { + "External id": 993038,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345942173506.499, "dur": 28.521, + "args": { + "External id": 993039,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942173554.705, "dur": 28.859, + "args": { + "External id": 993040,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942173601.297, "dur": 19.939, + "args": { + "External id": 993041,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942173637.762, "dur": 16.816, + "args": { + "External id": 993042,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345942173669.023, "dur": 17.955, + "args": { + "External id": 993043,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173776.571, "dur": 17.455, + "args": { + "External id": 993044,"Record function id": 0, "Ev Idx": 11027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173780.211, "dur": 12.794, + "args": { + "External id": 993045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173785.096, "dur": 6.889, + "args": { + "External id": 993046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173787.254, "dur": 4.594, + "args": { + "External id": 993047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173798.831, "dur": 6.142, + "args": { + "External id": 993048,"Record function id": 0, "Ev Idx": 11031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173800.382, "dur": 4.029, + "args": { + "External id": 993049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173801.237, "dur": 2.485, + "args": { + "External id": 993050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173802.353, "dur": 1.269, + "args": { + "External id": 993051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173808.954, "dur": 4.639, + "args": { + "External id": 993052,"Record function id": 0, "Ev Idx": 11035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173810.226, "dur": 2.859, + "args": { + "External id": 993053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173810.828, "dur": 1.697, + "args": { + "External id": 993054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173811.410, "dur": 1.008, + "args": { + "External id": 993055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173817.438, "dur": 4.488, + "args": { + "External id": 993056,"Record function id": 0, "Ev Idx": 11039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173818.899, "dur": 2.513, + "args": { + "External id": 993057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173819.514, "dur": 1.371, + "args": { + "External id": 993058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173820.021, "dur": 0.764, + "args": { + "External id": 993059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173825.787, "dur": 4.650, + "args": { + "External id": 993060,"Record function id": 0, "Ev Idx": 11043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173827.151, "dur": 2.772, + "args": { + "External id": 993061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173828.081, "dur": 1.220, + "args": { + "External id": 993062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173828.499, "dur": 0.720, + "args": { + "External id": 993063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173834.372, "dur": 4.899, + "args": { + "External id": 993064,"Record function id": 0, "Ev Idx": 11047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173835.799, "dur": 2.932, + "args": { + "External id": 993065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173836.523, "dur": 1.647, + "args": { + "External id": 993066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173837.116, "dur": 0.914, + "args": { + "External id": 993067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173843.254, "dur": 6.772, + "args": { + "External id": 993068,"Record function id": 0, "Ev Idx": 11051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173844.405, "dur": 5.123, + "args": { + "External id": 993069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173845.169, "dur": 3.570, + "args": { + "External id": 993070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173845.634, "dur": 3.024, + "args": { + "External id": 993071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173853.797, "dur": 4.511, + "args": { + "External id": 993072,"Record function id": 0, "Ev Idx": 11055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173855.081, "dur": 2.715, + "args": { + "External id": 993073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173855.671, "dur": 1.587, + "args": { + "External id": 993074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173856.225, "dur": 0.923, + "args": { + "External id": 993075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173862.525, "dur": 5.004, + "args": { + "External id": 993076,"Record function id": 0, "Ev Idx": 11059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942173863.806, "dur": 3.206, + "args": { + "External id": 993077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173864.553, "dur": 1.580, + "args": { + "External id": 993078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942173865.092, "dur": 0.893, + "args": { + "External id": 993079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942173872.969, "dur": 62588.334, + "args": { + "External id": 993080,"Record function id": 0, "Sequence number": 10552480, "Fwd thread id": 1, "Ev Idx": 11063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942173874.806, "dur": 62575.807, + "args": { + "External id": 993081,"Sequence number": 10552480, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11064 + } + }, + { + "ph": "f", "id": 439, "pid": 2338708, "tid": 2379421, "ts": 6345942173874.806, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 2338708, "tid": 2379421, + "ts": 6345942173909.825, "dur": 44.733, + "args": { + "External id": 993082,"Record function id": 0, "Ev Idx": 11065 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 2338708, "tid": 2379421, + "ts": 6345942173964.108, "dur": 151.482, + "args": { + "External id": 993083,"Record function id": 0, "Ev Idx": 11066 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 2338708, "tid": 2379421, + "ts": 6345942174126.420, "dur": 62315.079, + "args": { + "External id": 993084,"Record function id": 0, "Ev Idx": 11067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942174235.872, "dur": 8.658, + "args": { + "External id": 993085,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942174257.921, "dur": 6.132, + "args": { + "External id": 993086,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942174280.905, "dur": 61080.896, + "args": { + "External id": 993087,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942174297.932, "dur": 61049.032, + "args": { + "External id": 993088,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942174429.418, "dur": 20.927, + "args": { + "External id": 993089,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942174473.353, "dur": 60826.734, + "args": { + "External id": 993090,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942174476.628, "dur": 60822.366, + "args": { + "External id": 993091,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942174494.940, "dur": 13.750, + "args": { + "External id": 993092,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942174511.577, "dur": 60784.074, + "args": { + "External id": 993093,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942235487.943, "dur": 12.945, + "args": { + "External id": 993094,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942235491.696, "dur": 8.598, + "args": { + "External id": 993095,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942235536.153, "dur": 406.990, + "args": { + "External id": 993096,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942235573.105, "dur": 364.187, + "args": { + "External id": 993097,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11080, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942235586.871, "dur": 344.011, + "args": { + "External id": 993098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942235965.375, "dur": 2.887, + "args": { + "External id": 993099,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11082, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942236093.564, "dur": 8.199, + "args": { + "External id": 993100,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942236116.986, "dur": 42.223, + "args": { + "External id": 993101,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942236172.429, "dur": 1.873, + "args": { + "External id": 993102,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942236181.374, "dur": 14.757, + "args": { + "External id": 993103,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942236203.249, "dur": 3.535, + "args": { + "External id": 993104,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942236212.323, "dur": 14.702, + "args": { + "External id": 993105,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942236233.615, "dur": 1.100, + "args": { + "External id": 993106,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942236240.022, "dur": 13.056, + "args": { + "External id": 993107,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942236258.223, "dur": 0.873, + "args": { + "External id": 993108,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942236263.803, "dur": 12.835, + "args": { + "External id": 993109,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942236281.563, "dur": 1.237, + "args": { + "External id": 993110,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942236287.659, "dur": 12.116, + "args": { + "External id": 993111,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942236304.909, "dur": 1.527, + "args": { + "External id": 993112,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942236311.386, "dur": 13.109, + "args": { + "External id": 993113,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942236332.128, "dur": 0.915, + "args": { + "External id": 993114,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942236338.479, "dur": 11.653, + "args": { + "External id": 993115,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942236355.062, "dur": 1.021, + "args": { + "External id": 993116,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942236361.123, "dur": 12.080, + "args": { + "External id": 993117,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942236479.633, "dur": 3442.381, + "args": { + "External id": 993118,"Record function id": 0, "Ev Idx": 11101 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 2338708, "tid": 2379421, + "ts": 6345942236503.403, "dur": 1240.424, + "args": { + "External id": 993119,"Record function id": 0, "Ev Idx": 11102 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338708, "tid": 2379421, + "ts": 6345942236523.310, "dur": 375.621, + "args": { + "External id": 993120,"Record function id": 0, "Ev Idx": 11103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942236626.011, "dur": 5.079, + "args": { + "External id": 993121,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942236634.721, "dur": 3.629, + "args": { + "External id": 993122,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942236640.768, "dur": 1.364, + "args": { + "External id": 993123,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942236644.783, "dur": 1.030, + "args": { + "External id": 993124,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942236647.737, "dur": 0.793, + "args": { + "External id": 993125,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942236651.020, "dur": 1.183, + "args": { + "External id": 993126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942236656.937, "dur": 0.903, + "args": { + "External id": 993127,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942236659.512, "dur": 2.120, + "args": { + "External id": 993128,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942236663.821, "dur": 0.766, + "args": { + "External id": 993129,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942236666.284, "dur": 3.682, + "args": { + "External id": 993130,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942236694.404, "dur": 169.991, + "args": { + "External id": 993131,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942236714.301, "dur": 144.682, + "args": { + "External id": 993132,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942236732.542, "dur": 17.670, + "args": { + "External id": 993133,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942236754.722, "dur": 73.167, + "args": { + "External id": 993134,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942236757.850, "dur": 69.600, + "args": { + "External id": 993135,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942236762.701, "dur": 6.087, + "args": { + "External id": 993136,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942236771.145, "dur": 55.622, + "args": { + "External id": 993137,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11120 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 2338708, "tid": 2379421, + "ts": 6345942237006.242, "dur": 728.462, + "args": { + "External id": 993138,"Record function id": 0, "Ev Idx": 11121 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338708, "tid": 2379421, + "ts": 6345942237049.334, "dur": 670.157, + "args": { + "External id": 993139,"Record function id": 0, "Ev Idx": 11122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942237167.080, "dur": 8.001, + "args": { + "External id": 993140,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345942237193.917, "dur": 33.902, + "args": { + "External id": 993141,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237200.305, "dur": 1.986, + "args": { + "External id": 993142,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237204.264, "dur": 0.893, + "args": { + "External id": 993143,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237207.697, "dur": 0.719, + "args": { + "External id": 993144,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237210.576, "dur": 0.451, + "args": { + "External id": 993145,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237212.113, "dur": 2.478, + "args": { + "External id": 993146,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237216.385, "dur": 0.873, + "args": { + "External id": 993147,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237218.961, "dur": 0.631, + "args": { + "External id": 993148,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237220.634, "dur": 0.451, + "args": { + "External id": 993149,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237222.899, "dur": 0.440, + "args": { + "External id": 993150,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942237240.359, "dur": 49.673, + "args": { + "External id": 993151,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345942237327.899, "dur": 125.994, + "args": { + "External id": 993152,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942237340.823, "dur": 3.457, + "args": { + "External id": 993153,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345942237350.464, "dur": 11.757, + "args": { + "External id": 993154,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345942237355.590, "dur": 6.166, + "args": { + "External id": 993155,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237359.765, "dur": 0.659, + "args": { + "External id": 993156,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345942237370.464, "dur": 27.610, + "args": { + "External id": 993157,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237373.149, "dur": 0.471, + "args": { + "External id": 993158,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237375.300, "dur": 3.129, + "args": { + "External id": 993159,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237380.457, "dur": 0.547, + "args": { + "External id": 993160,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237383.053, "dur": 0.589, + "args": { + "External id": 993161,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237384.735, "dur": 0.786, + "args": { + "External id": 993162,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237387.204, "dur": 0.416, + "args": { + "External id": 993163,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237389.486, "dur": 0.418, + "args": { + "External id": 993164,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237390.883, "dur": 0.492, + "args": { + "External id": 993165,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942237393.442, "dur": 0.468, + "args": { + "External id": 993166,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942237411.250, "dur": 31.980, + "args": { + "External id": 993167,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942237506.462, "dur": 132.134, + "args": { + "External id": 993168,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942237534.639, "dur": 100.024, + "args": { + "External id": 993169,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11152, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942237545.940, "dur": 83.629, + "args": { + "External id": 993170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942237658.364, "dur": 2.348, + "args": { + "External id": 993171,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11154, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942237753.203, "dur": 2146.121, + "args": { + "External id": 993172,"Sequence number": 10552479, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11155 + } + }, + { + "ph": "f", "id": 440, "pid": 2338708, "tid": 2379421, "ts": 6345942237753.203, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942237941.477, "dur": 180.320, + "args": { + "External id": 993173,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942238180.917, "dur": 47.605, + "args": { + "External id": 993174,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345942238252.592, "dur": 65.397, + "args": { + "External id": 993175,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942238329.988, "dur": 38.012, + "args": { + "External id": 993176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942238375.178, "dur": 38.358, + "args": { + "External id": 993177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942238420.858, "dur": 33.047, + "args": { + "External id": 993178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942238464.315, "dur": 35.591, + "args": { + "External id": 993179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942238529.203, "dur": 29.232, + "args": { + "External id": 993180,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942238581.668, "dur": 34.639, + "args": { + "External id": 993181,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942238641.858, "dur": 30.229, + "args": { + "External id": 993182,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942238687.450, "dur": 19.988, + "args": { + "External id": 993183,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942238720.429, "dur": 44.583, + "args": { + "External id": 993184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942238769.725, "dur": 38.624, + "args": { + "External id": 993185,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345942238846.902, "dur": 400.367, + "args": { + "External id": 993186,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942238956.291, "dur": 11.136, + "args": { + "External id": 993187,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942238975.086, "dur": 3.592, + "args": { + "External id": 993188,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942238980.131, "dur": 2.747, + "args": { + "External id": 993189,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942238984.218, "dur": 5.301, + "args": { + "External id": 993190,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345942239101.525, "dur": 6.980, + "args": { + "External id": 993191,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942239104.290, "dur": 3.761, + "args": { + "External id": 993192,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345942239110.624, "dur": 39.882, + "args": { + "External id": 993193,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942239117.981, "dur": 2.057, + "args": { + "External id": 993194,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345942239152.684, "dur": 2.695, + "args": { + "External id": 993195,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942239154.317, "dur": 0.973, + "args": { + "External id": 993196,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345942239157.205, "dur": 22.065, + "args": { + "External id": 993197,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942239159.642, "dur": 0.740, + "args": { + "External id": 993198,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942239294.036, "dur": 35.196, + "args": { + "External id": 993199,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942239348.437, "dur": 21.304, + "args": { + "External id": 993200,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942239380.119, "dur": 61.493, + "args": { + "External id": 993201,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942239450.580, "dur": 47.020, + "args": { + "External id": 993202,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942239507.886, "dur": 26.563, + "args": { + "External id": 993203,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942239544.101, "dur": 37.685, + "args": { + "External id": 993204,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942239590.359, "dur": 33.558, + "args": { + "External id": 993205,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942239633.132, "dur": 37.309, + "args": { + "External id": 993206,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345942239690.626, "dur": 29.115, + "args": { + "External id": 993207,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942239737.185, "dur": 28.525, + "args": { + "External id": 993208,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942239781.974, "dur": 19.567, + "args": { + "External id": 993209,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942239816.323, "dur": 15.827, + "args": { + "External id": 993210,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345942239846.475, "dur": 16.652, + "args": { + "External id": 993211,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942239950.513, "dur": 18.210, + "args": { + "External id": 993212,"Record function id": 0, "Ev Idx": 11195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942239954.640, "dur": 13.031, + "args": { + "External id": 993213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942239960.189, "dur": 6.266, + "args": { + "External id": 993214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942239962.192, "dur": 4.130, + "args": { + "External id": 993215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942239973.454, "dur": 6.273, + "args": { + "External id": 993216,"Record function id": 0, "Ev Idx": 11199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942239975.097, "dur": 3.923, + "args": { + "External id": 993217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942239976.077, "dur": 2.135, + "args": { + "External id": 993218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942239977.123, "dur": 0.964, + "args": { + "External id": 993219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942239983.837, "dur": 5.701, + "args": { + "External id": 993220,"Record function id": 0, "Ev Idx": 11203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942239985.404, "dur": 3.655, + "args": { + "External id": 993221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942239986.501, "dur": 1.958, + "args": { + "External id": 993222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942239987.387, "dur": 0.994, + "args": { + "External id": 993223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942239993.643, "dur": 4.277, + "args": { + "External id": 993224,"Record function id": 0, "Ev Idx": 11207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942239994.777, "dur": 2.548, + "args": { + "External id": 993225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942239995.431, "dur": 1.432, + "args": { + "External id": 993226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942239995.971, "dur": 0.818, + "args": { + "External id": 993227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942240001.561, "dur": 4.316, + "args": { + "External id": 993228,"Record function id": 0, "Ev Idx": 11211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942240002.927, "dur": 2.445, + "args": { + "External id": 993229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942240003.460, "dur": 1.390, + "args": { + "External id": 993230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942240003.793, "dur": 0.979, + "args": { + "External id": 993231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942240033.761, "dur": 10.316, + "args": { + "External id": 993232,"Record function id": 0, "Ev Idx": 11215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942240035.869, "dur": 7.423, + "args": { + "External id": 993233,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942240037.027, "dur": 5.260, + "args": { + "External id": 993234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942240038.098, "dur": 3.922, + "args": { + "External id": 993235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942240048.187, "dur": 5.016, + "args": { + "External id": 993236,"Record function id": 0, "Ev Idx": 11219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942240049.677, "dur": 2.997, + "args": { + "External id": 993237,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942240050.321, "dur": 1.536, + "args": { + "External id": 993238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942240050.662, "dur": 1.116, + "args": { + "External id": 993239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942240095.508, "dur": 8.189, + "args": { + "External id": 993240,"Record function id": 0, "Ev Idx": 11223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942240097.761, "dur": 4.953, + "args": { + "External id": 993241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942240099.282, "dur": 2.441, + "args": { + "External id": 993242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942240099.974, "dur": 1.479, + "args": { + "External id": 993243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942240108.138, "dur": 5.310, + "args": { + "External id": 993244,"Record function id": 0, "Ev Idx": 11227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942240109.419, "dur": 3.535, + "args": { + "External id": 993245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942240110.232, "dur": 1.996, + "args": { + "External id": 993246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942240110.871, "dur": 1.212, + "args": { + "External id": 993247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942240118.751, "dur": 60659.110, + "args": { + "External id": 993248,"Record function id": 0, "Sequence number": 10552478, "Fwd thread id": 1, "Ev Idx": 11231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942240120.659, "dur": 60646.066, + "args": { + "External id": 993249,"Sequence number": 10552478, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11232 + } + }, + { + "ph": "f", "id": 441, "pid": 2338708, "tid": 2379421, "ts": 6345942240120.659, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 2338708, "tid": 2379421, + "ts": 6345942240159.713, "dur": 48.927, + "args": { + "External id": 993250,"Record function id": 0, "Ev Idx": 11233 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 2338708, "tid": 2379421, + "ts": 6345942240218.608, "dur": 80.676, + "args": { + "External id": 993251,"Record function id": 0, "Ev Idx": 11234 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 2338708, "tid": 2379421, + "ts": 6345942240306.105, "dur": 60451.077, + "args": { + "External id": 993252,"Record function id": 0, "Ev Idx": 11235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942240413.037, "dur": 8.427, + "args": { + "External id": 993253,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942240433.601, "dur": 5.230, + "args": { + "External id": 993254,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942240455.049, "dur": 59100.677, + "args": { + "External id": 993255,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942240473.837, "dur": 59067.232, + "args": { + "External id": 993256,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942240594.732, "dur": 21.181, + "args": { + "External id": 993257,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942240638.938, "dur": 58852.966, + "args": { + "External id": 993258,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942240642.387, "dur": 58848.244, + "args": { + "External id": 993259,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942240648.129, "dur": 8.854, + "args": { + "External id": 993260,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942240659.695, "dur": 58824.981, + "args": { + "External id": 993261,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942299683.270, "dur": 13.229, + "args": { + "External id": 993262,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942299686.890, "dur": 9.031, + "args": { + "External id": 993263,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942299731.998, "dur": 504.515, + "args": { + "External id": 993264,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942299766.665, "dur": 463.117, + "args": { + "External id": 993265,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11248, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942299779.445, "dur": 442.880, + "args": { + "External id": 993266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942300264.950, "dur": 3.235, + "args": { + "External id": 993267,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11250, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942300351.021, "dur": 7.985, + "args": { + "External id": 993268,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942300373.318, "dur": 41.149, + "args": { + "External id": 993269,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942300426.779, "dur": 1.591, + "args": { + "External id": 993270,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942300435.034, "dur": 13.623, + "args": { + "External id": 993271,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942300455.536, "dur": 3.525, + "args": { + "External id": 993272,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942300464.182, "dur": 14.437, + "args": { + "External id": 993273,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942300492.280, "dur": 1.233, + "args": { + "External id": 993274,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942300498.850, "dur": 13.629, + "args": { + "External id": 993275,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942300518.135, "dur": 1.143, + "args": { + "External id": 993276,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942300523.666, "dur": 14.432, + "args": { + "External id": 993277,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942300543.327, "dur": 1.201, + "args": { + "External id": 993278,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942300549.020, "dur": 14.738, + "args": { + "External id": 993279,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942300569.264, "dur": 1.361, + "args": { + "External id": 993280,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942300612.992, "dur": 15.250, + "args": { + "External id": 993281,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942300636.801, "dur": 1.368, + "args": { + "External id": 993282,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942300642.971, "dur": 14.273, + "args": { + "External id": 993283,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942300662.610, "dur": 0.982, + "args": { + "External id": 993284,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942300669.052, "dur": 14.359, + "args": { + "External id": 993285,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942300795.692, "dur": 3440.688, + "args": { + "External id": 993286,"Record function id": 0, "Ev Idx": 11269 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 2338708, "tid": 2379421, + "ts": 6345942300821.213, "dur": 1301.817, + "args": { + "External id": 993287,"Record function id": 0, "Ev Idx": 11270 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338708, "tid": 2379421, + "ts": 6345942300839.562, "dur": 444.585, + "args": { + "External id": 993288,"Record function id": 0, "Ev Idx": 11271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942300941.726, "dur": 5.294, + "args": { + "External id": 993289,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942300950.813, "dur": 3.047, + "args": { + "External id": 993290,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942300956.607, "dur": 0.938, + "args": { + "External id": 993291,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942300959.648, "dur": 0.912, + "args": { + "External id": 993292,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942300962.771, "dur": 1.108, + "args": { + "External id": 993293,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942300967.481, "dur": 0.918, + "args": { + "External id": 993294,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942300970.361, "dur": 0.753, + "args": { + "External id": 993295,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942300972.778, "dur": 2.096, + "args": { + "External id": 993296,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942300976.480, "dur": 0.762, + "args": { + "External id": 993297,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942300980.678, "dur": 3.471, + "args": { + "External id": 993298,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942301006.627, "dur": 235.956, + "args": { + "External id": 993299,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942301045.491, "dur": 190.324, + "args": { + "External id": 993300,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942301102.173, "dur": 18.928, + "args": { + "External id": 993301,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942301125.802, "dur": 81.027, + "args": { + "External id": 993302,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942301129.109, "dur": 77.268, + "args": { + "External id": 993303,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301133.537, "dur": 7.877, + "args": { + "External id": 993304,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942301143.366, "dur": 62.337, + "args": { + "External id": 993305,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11288 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 2338708, "tid": 2379421, + "ts": 6345942301394.442, "dur": 718.851, + "args": { + "External id": 993306,"Record function id": 0, "Ev Idx": 11289 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338708, "tid": 2379421, + "ts": 6345942301414.595, "dur": 678.429, + "args": { + "External id": 993307,"Record function id": 0, "Ev Idx": 11290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942301488.669, "dur": 5.876, + "args": { + "External id": 993308,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345942301512.868, "dur": 33.214, + "args": { + "External id": 993309,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301519.369, "dur": 2.094, + "args": { + "External id": 993310,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301524.113, "dur": 1.035, + "args": { + "External id": 993311,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301527.200, "dur": 0.704, + "args": { + "External id": 993312,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301529.659, "dur": 0.534, + "args": { + "External id": 993313,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301531.391, "dur": 2.333, + "args": { + "External id": 993314,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301535.606, "dur": 0.432, + "args": { + "External id": 993315,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301538.046, "dur": 0.363, + "args": { + "External id": 993316,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301539.709, "dur": 0.313, + "args": { + "External id": 993317,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301541.509, "dur": 0.640, + "args": { + "External id": 993318,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942301558.833, "dur": 46.149, + "args": { + "External id": 993319,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345942301641.703, "dur": 125.215, + "args": { + "External id": 993320,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942301654.031, "dur": 3.782, + "args": { + "External id": 993321,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345942301663.749, "dur": 11.885, + "args": { + "External id": 993322,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345942301668.669, "dur": 6.485, + "args": { + "External id": 993323,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301672.570, "dur": 1.015, + "args": { + "External id": 993324,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345942301683.749, "dur": 27.400, + "args": { + "External id": 993325,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301686.323, "dur": 0.706, + "args": { + "External id": 993326,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301688.859, "dur": 3.040, + "args": { + "External id": 993327,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301693.363, "dur": 0.800, + "args": { + "External id": 993328,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301695.764, "dur": 0.385, + "args": { + "External id": 993329,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301697.425, "dur": 0.651, + "args": { + "External id": 993330,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301700.085, "dur": 0.593, + "args": { + "External id": 993331,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301702.265, "dur": 0.399, + "args": { + "External id": 993332,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301703.876, "dur": 0.447, + "args": { + "External id": 993333,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942301705.711, "dur": 0.397, + "args": { + "External id": 993334,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942301723.716, "dur": 32.379, + "args": { + "External id": 993335,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942301821.722, "dur": 131.205, + "args": { + "External id": 993336,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942301851.783, "dur": 97.187, + "args": { + "External id": 993337,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11320, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942301862.424, "dur": 81.634, + "args": { + "External id": 993338,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942301971.606, "dur": 2.127, + "args": { + "External id": 993339,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11322, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942302133.316, "dur": 2079.862, + "args": { + "External id": 993340,"Sequence number": 10552477, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11323 + } + }, + { + "ph": "f", "id": 442, "pid": 2338708, "tid": 2379421, "ts": 6345942302133.316, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942302268.468, "dur": 124.339, + "args": { + "External id": 993341,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942302442.607, "dur": 49.456, + "args": { + "External id": 993342,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345942302515.212, "dur": 57.521, + "args": { + "External id": 993343,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942302584.680, "dur": 37.194, + "args": { + "External id": 993344,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942302629.559, "dur": 37.210, + "args": { + "External id": 993345,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942302674.713, "dur": 31.923, + "args": { + "External id": 993346,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942302717.306, "dur": 34.310, + "args": { + "External id": 993347,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942302782.490, "dur": 27.848, + "args": { + "External id": 993348,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942302832.349, "dur": 35.953, + "args": { + "External id": 993349,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942302893.701, "dur": 25.011, + "args": { + "External id": 993350,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942302934.009, "dur": 17.216, + "args": { + "External id": 993351,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942302962.871, "dur": 42.897, + "args": { + "External id": 993352,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942303027.841, "dur": 85.381, + "args": { + "External id": 993353,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345942303167.101, "dur": 319.204, + "args": { + "External id": 993354,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942303271.376, "dur": 12.661, + "args": { + "External id": 993355,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942303287.080, "dur": 3.469, + "args": { + "External id": 993356,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942303292.401, "dur": 2.524, + "args": { + "External id": 993357,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942303295.988, "dur": 4.626, + "args": { + "External id": 993358,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345942303365.818, "dur": 6.559, + "args": { + "External id": 993359,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942303368.405, "dur": 3.693, + "args": { + "External id": 993360,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345942303374.641, "dur": 39.215, + "args": { + "External id": 993361,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942303382.191, "dur": 2.059, + "args": { + "External id": 993362,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345942303415.882, "dur": 2.118, + "args": { + "External id": 993363,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942303416.994, "dur": 0.887, + "args": { + "External id": 993364,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345942303419.403, "dur": 18.338, + "args": { + "External id": 993365,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942303421.972, "dur": 0.551, + "args": { + "External id": 993366,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942303529.141, "dur": 32.825, + "args": { + "External id": 993367,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942303581.179, "dur": 21.863, + "args": { + "External id": 993368,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942303613.462, "dur": 57.954, + "args": { + "External id": 993369,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942303680.169, "dur": 49.209, + "args": { + "External id": 993370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942303739.045, "dur": 28.019, + "args": { + "External id": 993371,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942303776.107, "dur": 38.849, + "args": { + "External id": 993372,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942303822.739, "dur": 32.579, + "args": { + "External id": 993373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942303863.556, "dur": 35.930, + "args": { + "External id": 993374,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345942303920.354, "dur": 29.300, + "args": { + "External id": 993375,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942303967.445, "dur": 28.455, + "args": { + "External id": 993376,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942304034.260, "dur": 64.129, + "args": { + "External id": 993377,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942304121.726, "dur": 18.232, + "args": { + "External id": 993378,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345942304154.422, "dur": 19.395, + "args": { + "External id": 993379,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304263.224, "dur": 18.820, + "args": { + "External id": 993380,"Record function id": 0, "Ev Idx": 11363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304267.244, "dur": 13.813, + "args": { + "External id": 993381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304273.260, "dur": 6.843, + "args": { + "External id": 993382,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304275.247, "dur": 4.714, + "args": { + "External id": 993383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304286.692, "dur": 5.612, + "args": { + "External id": 993384,"Record function id": 0, "Ev Idx": 11367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304288.071, "dur": 3.708, + "args": { + "External id": 993385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304288.938, "dur": 2.202, + "args": { + "External id": 993386,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304289.841, "dur": 1.201, + "args": { + "External id": 993387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304296.232, "dur": 5.066, + "args": { + "External id": 993388,"Record function id": 0, "Ev Idx": 11371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304297.727, "dur": 3.086, + "args": { + "External id": 993389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304298.496, "dur": 1.618, + "args": { + "External id": 993390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304299.026, "dur": 1.004, + "args": { + "External id": 993391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304305.098, "dur": 4.691, + "args": { + "External id": 993392,"Record function id": 0, "Ev Idx": 11375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304306.590, "dur": 2.727, + "args": { + "External id": 993393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304307.582, "dur": 1.239, + "args": { + "External id": 993394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304307.986, "dur": 0.758, + "args": { + "External id": 993395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304324.812, "dur": 5.767, + "args": { + "External id": 993396,"Record function id": 0, "Ev Idx": 11379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304326.328, "dur": 3.764, + "args": { + "External id": 993397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304327.826, "dur": 1.741, + "args": { + "External id": 993398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304328.800, "dur": 0.688, + "args": { + "External id": 993399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304334.294, "dur": 8.574, + "args": { + "External id": 993400,"Record function id": 0, "Ev Idx": 11383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304335.451, "dur": 6.901, + "args": { + "External id": 993401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304336.129, "dur": 5.635, + "args": { + "External id": 993402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304338.011, "dur": 3.643, + "args": { + "External id": 993403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304346.693, "dur": 5.168, + "args": { + "External id": 993404,"Record function id": 0, "Ev Idx": 11387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304348.249, "dur": 3.102, + "args": { + "External id": 993405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304349.093, "dur": 1.610, + "args": { + "External id": 993406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304349.643, "dur": 0.986, + "args": { + "External id": 993407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304356.235, "dur": 5.046, + "args": { + "External id": 993408,"Record function id": 0, "Ev Idx": 11391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304357.803, "dur": 2.996, + "args": { + "External id": 993409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304358.565, "dur": 1.702, + "args": { + "External id": 993410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304359.240, "dur": 0.935, + "args": { + "External id": 993411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304365.534, "dur": 4.871, + "args": { + "External id": 993412,"Record function id": 0, "Ev Idx": 11395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942304366.891, "dur": 3.027, + "args": { + "External id": 993413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304367.596, "dur": 1.715, + "args": { + "External id": 993414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942304368.245, "dur": 0.986, + "args": { + "External id": 993415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942304375.651, "dur": 60590.515, + "args": { + "External id": 993416,"Record function id": 0, "Sequence number": 10552476, "Fwd thread id": 1, "Ev Idx": 11399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942304377.614, "dur": 60576.987, + "args": { + "External id": 993417,"Sequence number": 10552476, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11400 + } + }, + { + "ph": "f", "id": 443, "pid": 2338708, "tid": 2379421, "ts": 6345942304377.614, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 2338708, "tid": 2379421, + "ts": 6345942304412.255, "dur": 44.285, + "args": { + "External id": 993418,"Record function id": 0, "Ev Idx": 11401 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 2338708, "tid": 2379421, + "ts": 6345942304465.872, "dur": 84.197, + "args": { + "External id": 993419,"Record function id": 0, "Ev Idx": 11402 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 2338708, "tid": 2379421, + "ts": 6345942304557.459, "dur": 60386.544, + "args": { + "External id": 993420,"Record function id": 0, "Ev Idx": 11403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942304659.488, "dur": 7.298, + "args": { + "External id": 993421,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942304677.602, "dur": 5.143, + "args": { + "External id": 993422,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942304700.205, "dur": 59149.607, + "args": { + "External id": 993423,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942304715.472, "dur": 59118.757, + "args": { + "External id": 993424,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942304837.883, "dur": 31.688, + "args": { + "External id": 993425,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942304897.782, "dur": 58890.728, + "args": { + "External id": 993426,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942304902.251, "dur": 58885.391, + "args": { + "External id": 993427,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942304912.220, "dur": 10.669, + "args": { + "External id": 993428,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942304925.244, "dur": 58859.287, + "args": { + "External id": 993429,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942363976.325, "dur": 13.930, + "args": { + "External id": 993430,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942363980.620, "dur": 8.919, + "args": { + "External id": 993431,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942364040.902, "dur": 454.937, + "args": { + "External id": 993432,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942364110.244, "dur": 379.345, + "args": { + "External id": 993433,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11416, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942364124.595, "dur": 356.696, + "args": { + "External id": 993434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942364518.448, "dur": 2.989, + "args": { + "External id": 993435,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11418, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942364596.348, "dur": 7.629, + "args": { + "External id": 993436,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942364618.149, "dur": 41.188, + "args": { + "External id": 993437,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942364672.309, "dur": 4.223, + "args": { + "External id": 993438,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942364683.168, "dur": 15.728, + "args": { + "External id": 993439,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942364705.389, "dur": 1.069, + "args": { + "External id": 993440,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942364711.585, "dur": 14.528, + "args": { + "External id": 993441,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942364732.275, "dur": 0.947, + "args": { + "External id": 993442,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942364737.745, "dur": 13.336, + "args": { + "External id": 993443,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942364755.836, "dur": 1.075, + "args": { + "External id": 993444,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942364761.173, "dur": 14.346, + "args": { + "External id": 993445,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942364780.517, "dur": 1.673, + "args": { + "External id": 993446,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942364787.114, "dur": 13.105, + "args": { + "External id": 993447,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942364805.241, "dur": 1.110, + "args": { + "External id": 993448,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942364811.285, "dur": 14.562, + "args": { + "External id": 993449,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942364830.623, "dur": 1.151, + "args": { + "External id": 993450,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942364836.756, "dur": 12.714, + "args": { + "External id": 993451,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942364856.087, "dur": 1.080, + "args": { + "External id": 993452,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942364861.976, "dur": 14.589, + "args": { + "External id": 993453,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942364982.909, "dur": 3472.613, + "args": { + "External id": 993454,"Record function id": 0, "Ev Idx": 11437 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 2338708, "tid": 2379421, + "ts": 6345942365026.667, "dur": 1336.989, + "args": { + "External id": 993455,"Record function id": 0, "Ev Idx": 11438 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338708, "tid": 2379421, + "ts": 6345942365048.457, "dur": 443.719, + "args": { + "External id": 993456,"Record function id": 0, "Ev Idx": 11439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942365197.047, "dur": 8.473, + "args": { + "External id": 993457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942365210.137, "dur": 1.102, + "args": { + "External id": 993458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942365213.735, "dur": 1.271, + "args": { + "External id": 993459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942365217.604, "dur": 0.973, + "args": { + "External id": 993460,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942365220.690, "dur": 0.791, + "args": { + "External id": 993461,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942365223.441, "dur": 1.100, + "args": { + "External id": 993462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942365226.353, "dur": 1.331, + "args": { + "External id": 993463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942365229.244, "dur": 1.560, + "args": { + "External id": 993464,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942365234.548, "dur": 3.077, + "args": { + "External id": 993465,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942365239.283, "dur": 0.767, + "args": { + "External id": 993466,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942365272.475, "dur": 178.634, + "args": { + "External id": 993467,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942365295.870, "dur": 149.580, + "args": { + "External id": 993468,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942365313.409, "dur": 18.079, + "args": { + "External id": 993469,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942365335.668, "dur": 80.205, + "args": { + "External id": 993470,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942365339.003, "dur": 76.501, + "args": { + "External id": 993471,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365344.025, "dur": 6.616, + "args": { + "External id": 993472,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942365352.716, "dur": 61.997, + "args": { + "External id": 993473,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11456 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 2338708, "tid": 2379421, + "ts": 6345942365605.240, "dur": 747.336, + "args": { + "External id": 993474,"Record function id": 0, "Ev Idx": 11457 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338708, "tid": 2379421, + "ts": 6345942365627.690, "dur": 709.285, + "args": { + "External id": 993475,"Record function id": 0, "Ev Idx": 11458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942365704.709, "dur": 5.761, + "args": { + "External id": 993476,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345942365728.225, "dur": 33.722, + "args": { + "External id": 993477,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365734.555, "dur": 1.857, + "args": { + "External id": 993478,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365738.849, "dur": 1.091, + "args": { + "External id": 993479,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365741.648, "dur": 0.824, + "args": { + "External id": 993480,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365744.732, "dur": 2.877, + "args": { + "External id": 993481,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365749.657, "dur": 0.614, + "args": { + "External id": 993482,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365751.465, "dur": 0.691, + "args": { + "External id": 993483,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365754.072, "dur": 0.513, + "args": { + "External id": 993484,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365756.235, "dur": 0.569, + "args": { + "External id": 993485,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365758.132, "dur": 0.411, + "args": { + "External id": 993486,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942365775.070, "dur": 46.985, + "args": { + "External id": 993487,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2379421, + "ts": 6345942365858.793, "dur": 132.555, + "args": { + "External id": 993488,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 11471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942365870.174, "dur": 3.720, + "args": { + "External id": 993489,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2379421, + "ts": 6345942365879.738, "dur": 12.115, + "args": { + "External id": 993490,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2379421, + "ts": 6345942365884.670, "dur": 6.664, + "args": { + "External id": 993491,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 11474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365888.659, "dur": 0.982, + "args": { + "External id": 993492,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 11475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2379421, + "ts": 6345942365899.443, "dur": 30.803, + "args": { + "External id": 993493,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 11476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365901.616, "dur": 2.982, + "args": { + "External id": 993494,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365906.249, "dur": 0.748, + "args": { + "External id": 993495,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365909.187, "dur": 0.525, + "args": { + "External id": 993496,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365911.327, "dur": 0.401, + "args": { + "External id": 993497,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365913.268, "dur": 0.389, + "args": { + "External id": 993498,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365915.348, "dur": 0.416, + "args": { + "External id": 993499,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365917.649, "dur": 0.654, + "args": { + "External id": 993500,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365919.767, "dur": 0.299, + "args": { + "External id": 993501,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942365921.440, "dur": 2.733, + "args": { + "External id": 993502,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942365946.561, "dur": 35.692, + "args": { + "External id": 993503,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 11486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942366108.327, "dur": 141.236, + "args": { + "External id": 993504,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 11487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942366140.923, "dur": 104.438, + "args": { + "External id": 993505,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11488, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942366154.133, "dur": 86.325, + "args": { + "External id": 993506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 11489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942366267.594, "dur": 2.087, + "args": { + "External id": 993507,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11490, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942366373.228, "dur": 2058.965, + "args": { + "External id": 993508,"Sequence number": 10552475, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11491 + } + }, + { + "ph": "f", "id": 444, "pid": 2338708, "tid": 2379421, "ts": 6345942366373.228, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942366507.368, "dur": 119.722, + "args": { + "External id": 993509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942366675.155, "dur": 47.627, + "args": { + "External id": 993510,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345942366744.360, "dur": 55.238, + "args": { + "External id": 993511,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942366811.610, "dur": 37.480, + "args": { + "External id": 993512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942366856.659, "dur": 35.897, + "args": { + "External id": 993513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942366900.425, "dur": 32.836, + "args": { + "External id": 993514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942366943.341, "dur": 33.159, + "args": { + "External id": 993515,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942367004.180, "dur": 97.041, + "args": { + "External id": 993516,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942367127.758, "dur": 36.041, + "args": { + "External id": 993517,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942367193.092, "dur": 25.542, + "args": { + "External id": 993518,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942367234.657, "dur": 19.925, + "args": { + "External id": 993519,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942367266.975, "dur": 51.732, + "args": { + "External id": 993520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942367323.244, "dur": 38.179, + "args": { + "External id": 993521,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345942367411.010, "dur": 309.484, + "args": { + "External id": 993522,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942367510.453, "dur": 8.708, + "args": { + "External id": 993523,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942367521.672, "dur": 2.698, + "args": { + "External id": 993524,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942367525.824, "dur": 2.604, + "args": { + "External id": 993525,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942367529.704, "dur": 4.970, + "args": { + "External id": 993526,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345942367596.018, "dur": 5.991, + "args": { + "External id": 993527,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942367598.418, "dur": 3.357, + "args": { + "External id": 993528,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345942367604.090, "dur": 36.438, + "args": { + "External id": 993529,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942367610.431, "dur": 1.856, + "args": { + "External id": 993530,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345942367642.530, "dur": 1.958, + "args": { + "External id": 993531,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942367643.738, "dur": 0.662, + "args": { + "External id": 993532,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345942367645.467, "dur": 21.604, + "args": { + "External id": 993533,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942367647.910, "dur": 0.435, + "args": { + "External id": 993534,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942367761.365, "dur": 30.526, + "args": { + "External id": 993535,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942367810.206, "dur": 18.002, + "args": { + "External id": 993536,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942367837.985, "dur": 47.537, + "args": { + "External id": 993537,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942367894.026, "dur": 44.612, + "args": { + "External id": 993538,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942367950.811, "dur": 25.640, + "args": { + "External id": 993539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942367983.958, "dur": 59.206, + "args": { + "External id": 993540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942368095.798, "dur": 41.915, + "args": { + "External id": 993541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942368148.123, "dur": 37.923, + "args": { + "External id": 993542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345942368208.774, "dur": 32.584, + "args": { + "External id": 993543,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942368260.137, "dur": 30.325, + "args": { + "External id": 993544,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942368307.321, "dur": 22.887, + "args": { + "External id": 993545,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942368346.073, "dur": 17.397, + "args": { + "External id": 993546,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345942368377.818, "dur": 19.111, + "args": { + "External id": 993547,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368483.521, "dur": 17.727, + "args": { + "External id": 993548,"Record function id": 0, "Ev Idx": 11531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368487.761, "dur": 12.523, + "args": { + "External id": 993549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368492.731, "dur": 6.250, + "args": { + "External id": 993550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368494.365, "dur": 4.495, + "args": { + "External id": 993551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368505.941, "dur": 4.972, + "args": { + "External id": 993552,"Record function id": 0, "Ev Idx": 11535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368507.413, "dur": 2.975, + "args": { + "External id": 993553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368508.216, "dur": 1.612, + "args": { + "External id": 993554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368508.885, "dur": 0.844, + "args": { + "External id": 993555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368514.734, "dur": 4.729, + "args": { + "External id": 993556,"Record function id": 0, "Ev Idx": 11539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368516.451, "dur": 2.527, + "args": { + "External id": 993557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368517.208, "dur": 1.180, + "args": { + "External id": 993558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368517.575, "dur": 0.735, + "args": { + "External id": 993559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368523.261, "dur": 5.300, + "args": { + "External id": 993560,"Record function id": 0, "Ev Idx": 11543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368524.857, "dur": 3.165, + "args": { + "External id": 993561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368525.524, "dur": 1.819, + "args": { + "External id": 993562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368526.245, "dur": 0.979, + "args": { + "External id": 993563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368532.360, "dur": 4.604, + "args": { + "External id": 993564,"Record function id": 0, "Ev Idx": 11547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368533.721, "dur": 2.763, + "args": { + "External id": 993565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368534.708, "dur": 1.239, + "args": { + "External id": 993566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368535.121, "dur": 0.748, + "args": { + "External id": 993567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368540.652, "dur": 7.392, + "args": { + "External id": 993568,"Record function id": 0, "Ev Idx": 11551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368542.374, "dur": 5.164, + "args": { + "External id": 993569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368542.956, "dur": 3.945, + "args": { + "External id": 993570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368543.261, "dur": 3.560, + "args": { + "External id": 993571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368551.955, "dur": 5.748, + "args": { + "External id": 993572,"Record function id": 0, "Ev Idx": 11555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368553.694, "dur": 3.494, + "args": { + "External id": 993573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368554.293, "dur": 2.345, + "args": { + "External id": 993574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368555.611, "dur": 0.910, + "args": { + "External id": 993575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368562.339, "dur": 4.547, + "args": { + "External id": 993576,"Record function id": 0, "Ev Idx": 11559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368563.675, "dur": 2.698, + "args": { + "External id": 993577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368564.328, "dur": 1.297, + "args": { + "External id": 993578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368564.835, "dur": 0.682, + "args": { + "External id": 993579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368570.949, "dur": 4.222, + "args": { + "External id": 993580,"Record function id": 0, "Ev Idx": 11563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942368572.144, "dur": 2.521, + "args": { + "External id": 993581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368572.763, "dur": 1.143, + "args": { + "External id": 993582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942368573.234, "dur": 0.576, + "args": { + "External id": 993583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942368580.413, "dur": 60662.730, + "args": { + "External id": 993584,"Record function id": 0, "Sequence number": 10552474, "Fwd thread id": 1, "Ev Idx": 11567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942368582.151, "dur": 60649.215, + "args": { + "External id": 993585,"Sequence number": 10552474, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11568 + } + }, + { + "ph": "f", "id": 445, "pid": 2338708, "tid": 2379421, "ts": 6345942368582.151, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 2338708, "tid": 2379421, + "ts": 6345942368614.881, "dur": 42.569, + "args": { + "External id": 993586,"Record function id": 0, "Ev Idx": 11569 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 2338708, "tid": 2379421, + "ts": 6345942368665.844, "dur": 85.234, + "args": { + "External id": 993587,"Record function id": 0, "Ev Idx": 11570 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 2338708, "tid": 2379421, + "ts": 6345942368757.519, "dur": 60464.283, + "args": { + "External id": 993588,"Record function id": 0, "Ev Idx": 11571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942368859.533, "dur": 7.431, + "args": { + "External id": 993589,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942368878.050, "dur": 5.305, + "args": { + "External id": 993590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942368900.468, "dur": 59136.202, + "args": { + "External id": 993591,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942368916.208, "dur": 59105.291, + "args": { + "External id": 993592,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942369109.239, "dur": 23.848, + "args": { + "External id": 993593,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942369166.935, "dur": 58793.801, + "args": { + "External id": 993594,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942369170.548, "dur": 58789.088, + "args": { + "External id": 993595,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942369177.942, "dur": 16.528, + "args": { + "External id": 993596,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942369200.741, "dur": 58754.620, + "args": { + "External id": 993597,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942428190.021, "dur": 14.324, + "args": { + "External id": 993598,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942428193.828, "dur": 9.892, + "args": { + "External id": 993599,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942428238.590, "dur": 459.040, + "args": { + "External id": 993600,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942428271.845, "dur": 419.490, + "args": { + "External id": 993601,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11584, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942428285.420, "dur": 399.170, + "args": { + "External id": 993602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942428723.192, "dur": 2.584, + "args": { + "External id": 993603,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11586, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942428797.506, "dur": 7.556, + "args": { + "External id": 993604,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942428819.280, "dur": 43.038, + "args": { + "External id": 993605,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942428874.877, "dur": 1.890, + "args": { + "External id": 993606,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942428889.587, "dur": 15.918, + "args": { + "External id": 993607,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942428912.086, "dur": 1.736, + "args": { + "External id": 993608,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942428919.160, "dur": 15.372, + "args": { + "External id": 993609,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942428942.032, "dur": 1.180, + "args": { + "External id": 993610,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942428947.826, "dur": 13.634, + "args": { + "External id": 993611,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942428966.548, "dur": 3.540, + "args": { + "External id": 993612,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942428974.156, "dur": 13.974, + "args": { + "External id": 993613,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942428993.018, "dur": 1.114, + "args": { + "External id": 993614,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942428998.498, "dur": 31.935, + "args": { + "External id": 993615,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942429041.479, "dur": 1.985, + "args": { + "External id": 993616,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942429048.687, "dur": 49.278, + "args": { + "External id": 993617,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942429107.115, "dur": 1.915, + "args": { + "External id": 993618,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942429114.860, "dur": 13.176, + "args": { + "External id": 993619,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942429133.240, "dur": 1.179, + "args": { + "External id": 993620,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942429138.673, "dur": 11.347, + "args": { + "External id": 993621,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942429262.579, "dur": 2639.514, + "args": { + "External id": 993622,"Record function id": 0, "Ev Idx": 11605 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 2338708, "tid": 2379421, + "ts": 6345942429286.796, "dur": 509.484, + "args": { + "External id": 993623,"Record function id": 0, "Ev Idx": 11606 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338708, "tid": 2379421, + "ts": 6345942429303.412, "dur": 375.696, + "args": { + "External id": 993624,"Record function id": 0, "Ev Idx": 11607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942429409.628, "dur": 5.379, + "args": { + "External id": 993625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942429418.690, "dur": 1.390, + "args": { + "External id": 993626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942429422.374, "dur": 1.471, + "args": { + "External id": 993627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942429425.846, "dur": 3.687, + "args": { + "External id": 993628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942429431.642, "dur": 1.191, + "args": { + "External id": 993629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942429436.054, "dur": 1.058, + "args": { + "External id": 993630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942429439.127, "dur": 1.376, + "args": { + "External id": 993631,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942429441.984, "dur": 2.141, + "args": { + "External id": 993632,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942429446.086, "dur": 1.073, + "args": { + "External id": 993633,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942429450.873, "dur": 1.075, + "args": { + "External id": 993634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 11617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942429472.846, "dur": 170.824, + "args": { + "External id": 993635,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942429492.001, "dur": 145.793, + "args": { + "External id": 993636,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 11619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942429508.681, "dur": 17.891, + "args": { + "External id": 993637,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942429530.931, "dur": 77.697, + "args": { + "External id": 993638,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 11621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942429534.061, "dur": 74.156, + "args": { + "External id": 993639,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 11622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942429538.939, "dur": 9.208, + "args": { + "External id": 993640,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942429550.120, "dur": 57.242, + "args": { + "External id": 993641,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 11624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942429804.972, "dur": 2071.976, + "args": { + "External id": 993642,"Sequence number": 10552473, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11625 + } + }, + { + "ph": "f", "id": 446, "pid": 2338708, "tid": 2379421, "ts": 6345942429804.972, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942429934.176, "dur": 184.019, + "args": { + "External id": 993643,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [14336, 1], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 11626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942430170.869, "dur": 46.252, + "args": { + "External id": 993644,"kernel_hash": "cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/ez/cezlgti4aqqs6gxe4vabqpc2raldwbag3oekp7pddiybbginljff.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [32768, 14336], [8, 4096, 14336], [8, 4096, 14336], [8, 4096, 14336], []], "Ev Idx": 11627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2338708, "tid": 2379421, + "ts": 6345942430241.198, "dur": 68.810, + "args": { + "External id": 993645,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 4096], [0, 14336, 1], [58720256, 14336, 1]], "Input Dims": [[1, 4096, 32768], [1, 32768, 14336], [1, 4096, 14336]], "Ev Idx": 11628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942430326.040, "dur": 41.102, + "args": { + "External id": 993646,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942430376.189, "dur": 40.541, + "args": { + "External id": 993647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942430426.099, "dur": 33.742, + "args": { + "External id": 993648,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 14336], [4096, 1], [4096, 1]], "Input Dims": [[14336, 32768], [32768, 4096], [14336, 4096]], "Ev Idx": 11631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942430472.015, "dur": 34.446, + "args": { + "External id": 993649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 11632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942430532.510, "dur": 27.149, + "args": { + "External id": 993650,"kernel_hash": "czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/zb/czbnlqfxeg3kj7bddl4xtzyvy554z3tz3c5ocebr43lp2lapjdof.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 11633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942430579.284, "dur": 34.315, + "args": { + "External id": 993651,"kernel_hash": "clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/lr/clrl3oqqbm2bm6lrvgjdmmtb7mufwjay2fyqwkfduie6iboqsamj.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942430637.157, "dur": 21.765, + "args": { + "External id": 993652,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942430674.968, "dur": 16.939, + "args": { + "External id": 993653,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942430704.805, "dur": 45.296, + "args": { + "External id": 993654,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942430754.461, "dur": 39.402, + "args": { + "External id": 993655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2338708, "tid": 2379421, + "ts": 6345942430829.321, "dur": 356.077, + "args": { + "External id": 993656,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[16777216, 4096, 128, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [16777216, 4096, 128, 1], [131072, 4096, 1], [16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [8, 4096, 32, 128], [8, 32, 4096], [8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 11639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942430923.334, "dur": 7.573, + "args": { + "External id": 993657,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942430933.284, "dur": 3.108, + "args": { + "External id": 993658,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942430937.747, "dur": 2.719, + "args": { + "External id": 993659,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942430941.982, "dur": 5.361, + "args": { + "External id": 993660,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345942430994.109, "dur": 6.091, + "args": { + "External id": 993661,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942430996.325, "dur": 3.611, + "args": { + "External id": 993662,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345942431002.319, "dur": 97.260, + "args": { + "External id": 993663,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942431028.552, "dur": 2.685, + "args": { + "External id": 993664,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2379421, + "ts": 6345942431103.539, "dur": 2.856, + "args": { + "External id": 993665,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942431104.956, "dur": 1.247, + "args": { + "External id": 993666,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 4, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 11649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2379421, + "ts": 6345942431107.592, "dur": 20.040, + "args": { + "External id": 993667,"Record function id": 0, "Concrete Inputs": ["", "[3]", "False", "", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 512, 128, 1], [], [], [], [4194304, 1024, 128, 1]], "Input Dims": [[8, 4096, 8, 4, 128], [], [], [], [8, 4096, 8, 128]], "Ev Idx": 11650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942431111.854, "dur": 0.741, + "args": { + "External id": 993668,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 1, 128]", "[4194304, 1024, 128, 0, 1]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], []], "Ev Idx": 11651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942431229.051, "dur": 34.251, + "args": { + "External id": 993669,"kernel_hash": "c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/2y/c2y6fexkvtrqp7nvmfy6u6wybmfvzfzht6chq5ixajfum2wi56ya.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942431282.614, "dur": 19.940, + "args": { + "External id": 993670,"kernel_hash": "cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/in/cin4gmpgnljyricmbo7pva7m3nul2pofxyy6rnjk7qjpl2za4rso.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942431312.281, "dur": 58.812, + "args": { + "External id": 993671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942431379.433, "dur": 47.788, + "args": { + "External id": 993672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942431439.523, "dur": 27.994, + "args": { + "External id": 993673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 1024], [4096, 1], [4096, 1]], "Input Dims": [[1024, 32768], [32768, 4096], [1024, 4096]], "Ev Idx": 11656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942431475.712, "dur": 38.134, + "args": { + "External id": 993674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1024, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 1024], [1024, 4096], [32768, 4096]], "Ev Idx": 11657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942431521.919, "dur": 34.892, + "args": { + "External id": 993675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 4096], [4096, 1], [4096, 1]], "Input Dims": [[4096, 32768], [32768, 4096], [4096, 4096]], "Ev Idx": 11658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2379421, + "ts": 6345942431566.233, "dur": 55.507, + "args": { + "External id": 993676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 11659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2338708, "tid": 2379421, + "ts": 6345942431652.201, "dur": 30.226, + "args": { + "External id": 993677,"kernel_hash": "c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/5n/c5nnc7eo2glmh5wagyfoyxchodpvry2dwcvwy43m77ktufd2xs6k.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [32768, 4096], []], "Ev Idx": 11660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942431701.759, "dur": 32.238, + "args": { + "External id": 993678,"kernel_hash": "cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "32768", "4096", "1", "249", "132", "True", "4096", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/li/cliblwcrw53n5x3h3zub7w73fh2qsdkbp34seobab3p6ptupsbae.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [4096], [32768, 4096], [32768, 4096], [132, 4096], [32768], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 11661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942431749.578, "dur": 24.973, + "args": { + "External id": 993679,"kernel_hash": "cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd", "grid": "grid(8192,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "8192", "66"], "kernel_file": "/tmp/torchinductor_cvm/ur/cur4m2lvk2fc4b5ljgtqw2en623h2bfn7qlb3suoh4x37tixqopd.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [8192, 1, 4096], [], []], "Input Dims": [[132, 4096], [1, 4096, 2], [], []], "Ev Idx": 11662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2338708, "tid": 2379421, + "ts": 6345942431790.349, "dur": 17.342, + "args": { + "External id": 993680,"kernel_hash": "cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "2"], "kernel_file": "/tmp/torchinductor_cvm/nd/cndnpig3mpqrciwfmc3ucz44i7fimhiv6nwukkwjd4drfsavt5om.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 4096], [4096, 1], [], []], "Input Dims": [[1, 4096, 2], [1, 4096], [], []], "Ev Idx": 11663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2338708, "tid": 2379421, + "ts": 6345942431821.681, "dur": 18.829, + "args": { + "External id": 993681,"kernel_hash": "ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/cz/ccz6eolotjslg4eka74os3ylolokvpp3acdk66rzqa7g2ge6uech.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], [8, 4096, 4096], []], "Ev Idx": 11664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942431928.474, "dur": 18.227, + "args": { + "External id": 993682,"Record function id": 0, "Ev Idx": 11665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942431932.621, "dur": 12.842, + "args": { + "External id": 993683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942431937.667, "dur": 6.761, + "args": { + "External id": 993684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942431939.845, "dur": 4.441, + "args": { + "External id": 993685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942431951.850, "dur": 6.515, + "args": { + "External id": 993686,"Record function id": 0, "Ev Idx": 11669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942431953.826, "dur": 3.966, + "args": { + "External id": 993687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942431954.972, "dur": 2.150, + "args": { + "External id": 993688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942431956.158, "dur": 0.850, + "args": { + "External id": 993689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942431962.508, "dur": 5.418, + "args": { + "External id": 993690,"Record function id": 0, "Ev Idx": 11673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942431964.196, "dur": 3.203, + "args": { + "External id": 993691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942431965.065, "dur": 1.514, + "args": { + "External id": 993692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942431965.641, "dur": 0.854, + "args": { + "External id": 993693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 11676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942431971.887, "dur": 5.726, + "args": { + "External id": 993694,"Record function id": 0, "Ev Idx": 11677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942431973.898, "dur": 3.155, + "args": { + "External id": 993695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942431974.858, "dur": 1.672, + "args": { + "External id": 993696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942431975.691, "dur": 0.698, + "args": { + "External id": 993697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 11680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942431981.402, "dur": 4.577, + "args": { + "External id": 993698,"Record function id": 0, "Ev Idx": 11681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942431982.845, "dur": 2.630, + "args": { + "External id": 993699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942431983.633, "dur": 1.325, + "args": { + "External id": 993700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942431984.111, "dur": 0.764, + "args": { + "External id": 993701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942431989.762, "dur": 5.427, + "args": { + "External id": 993702,"Record function id": 0, "Ev Idx": 11685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942431991.479, "dur": 3.209, + "args": { + "External id": 993703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942431992.382, "dur": 1.686, + "args": { + "External id": 993704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942431993.175, "dur": 0.767, + "args": { + "External id": 993705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 11688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942431999.235, "dur": 7.569, + "args": { + "External id": 993706,"Record function id": 0, "Ev Idx": 11689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942432000.696, "dur": 5.575, + "args": { + "External id": 993707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942432001.300, "dur": 4.418, + "args": { + "External id": 993708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942432002.097, "dur": 3.451, + "args": { + "External id": 993709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942432030.728, "dur": 8.490, + "args": { + "External id": 993710,"Record function id": 0, "Ev Idx": 11693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942432033.415, "dur": 4.930, + "args": { + "External id": 993711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942432034.847, "dur": 2.468, + "args": { + "External id": 993712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942432035.638, "dur": 1.450, + "args": { + "External id": 993713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 11696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942432043.649, "dur": 4.962, + "args": { + "External id": 993714,"Record function id": 0, "Ev Idx": 11697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942432045.197, "dur": 2.849, + "args": { + "External id": 993715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942432045.819, "dur": 1.458, + "args": { + "External id": 993716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942432046.133, "dur": 1.046, + "args": { + "External id": 993717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 11700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942432089.829, "dur": 62009.648, + "args": { + "External id": 993718,"Record function id": 0, "Sequence number": 10552472, "Fwd thread id": 1, "Ev Idx": 11701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942432092.897, "dur": 61957.860, + "args": { + "External id": 993719,"Sequence number": 10552472, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11702 + } + }, + { + "ph": "f", "id": 447, "pid": 2338708, "tid": 2379421, "ts": 6345942432092.897, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 2338708, "tid": 2379421, + "ts": 6345942432130.888, "dur": 48.456, + "args": { + "External id": 993720,"Record function id": 0, "Ev Idx": 11703 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 2338708, "tid": 2379421, + "ts": 6345942432189.379, "dur": 90.226, + "args": { + "External id": 993721,"Record function id": 0, "Ev Idx": 11704 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 2338708, "tid": 2379421, + "ts": 6345942432286.904, "dur": 61753.514, + "args": { + "External id": 993722,"Record function id": 0, "Ev Idx": 11705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942432396.875, "dur": 8.497, + "args": { + "External id": 993723,"Record function id": 0, "Concrete Inputs": ["[218112000]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942432417.005, "dur": 6.353, + "args": { + "External id": 993724,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 11707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942432441.201, "dur": 60518.134, + "args": { + "External id": 993725,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942432457.460, "dur": 60487.838, + "args": { + "External id": 993726,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [4096, 1], [4096, 1], [4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], [], [], [27264000, 1]], "Input Dims": [[[4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], [], [], [8, 27264000]], "Ev Idx": 11709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942432584.764, "dur": 20.346, + "args": { + "External id": 993727,"Record function id": 0, "Concrete Inputs": ["[53306]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942432630.542, "dur": 60255.712, + "args": { + "External id": 993728,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], [], []], "Ev Idx": 11711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942432633.753, "dur": 60251.068, + "args": { + "External id": 993729,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[53306], [], [], [], [], [], []], "Ev Idx": 11712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942432639.417, "dur": 8.922, + "args": { + "External id": 993730,"Record function id": 0, "Concrete Inputs": ["[53306]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942432650.835, "dur": 60224.778, + "args": { + "External id": 993731,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[53306], [53306], []], "Ev Idx": 11714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942493138.721, "dur": 14.980, + "args": { + "External id": 993732,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[218112000], [], [], [], [], []], "Ev Idx": 11715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942493142.961, "dur": 10.054, + "args": { + "External id": 993733,"Record function id": 0, "Concrete Inputs": ["[27264000]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942493189.325, "dur": 403.062, + "args": { + "External id": 993734,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[27264000], [218112000], [], [], [], []], "Ev Idx": 11717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942493223.736, "dur": 362.941, + "args": { + "External id": 993735,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 27264000, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[218112000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11718, "In msg nelems": 218112000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942493237.216, "dur": 343.574, + "args": { + "External id": 993736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[218112000]], "Ev Idx": 11719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942493616.127, "dur": 2.791, + "args": { + "External id": 993737,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11720, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942493684.398, "dur": 7.779, + "args": { + "External id": 993738,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942493706.286, "dur": 38.468, + "args": { + "External id": 993739,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942493757.421, "dur": 2.042, + "args": { + "External id": 993740,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942493766.385, "dur": 14.950, + "args": { + "External id": 993741,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942493788.177, "dur": 1.591, + "args": { + "External id": 993742,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942493794.631, "dur": 13.216, + "args": { + "External id": 993743,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942493814.695, "dur": 1.439, + "args": { + "External id": 993744,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "2621952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942493820.536, "dur": 12.292, + "args": { + "External id": 993745,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942493838.872, "dur": 3.119, + "args": { + "External id": 993746,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "3146240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942493846.672, "dur": 13.126, + "args": { + "External id": 993747,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942493865.584, "dur": 1.136, + "args": { + "External id": 993748,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942493871.378, "dur": 11.583, + "args": { + "External id": 993749,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942493888.491, "dur": 1.032, + "args": { + "External id": 993750,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "5243904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942493894.158, "dur": 12.776, + "args": { + "External id": 993751,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942493912.243, "dur": 1.030, + "args": { + "External id": 993752,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "12583936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942493919.214, "dur": 11.272, + "args": { + "External id": 993753,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942493936.108, "dur": 1.242, + "args": { + "External id": 993754,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "19923968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 11737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942493941.599, "dur": 12.184, + "args": { + "External id": 993755,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942494121.430, "dur": 320.277, + "args": { + "External id": 993756,"Record function id": 0, "Sequence number": 10552471, "Fwd thread id": 1, "Ev Idx": 11739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2338708, "tid": 2379421, + "ts": 6345942494125.112, "dur": 305.796, + "args": { + "External id": 993757,"Sequence number": 10552471, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 11740 + } + }, + { + "ph": "f", "id": 448, "pid": 2338708, "tid": 2379421, "ts": 6345942494125.112, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 2338708, "tid": 2379421, + "ts": 6345942494267.760, "dur": 47.519, + "args": { + "External id": 993758,"kernel_hash": "c5m7emojmcmpfnsytzs4n2vhybuspjxfkuji6biwd2ecull3vbnp", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/5m/c5m7emojmcmpfnsytzs4n2vhybuspjxfkuji6biwd2ecull3vbnp.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 11741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 2338708, "tid": 2379421, + "ts": 6345942494333.426, "dur": 32.005, + "args": { + "External id": 993759,"kernel_hash": "c46xff3fh3ar7hq2aefm4fztaqpffb3u6n2xaouky6dh4l2633ed", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/46/c46xff3fh3ar7hq2aefm4fztaqpffb3u6n2xaouky6dh4l2633ed.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096, 4096], [32000, 4096], []], "Ev Idx": 11742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 2338708, "tid": 2379421, + "ts": 6345942494387.072, "dur": 25.218, + "args": { + "External id": 993760,"kernel_hash": "cj4ssgwdjcekiff7t7cfceucpuq2k6lgzvcstcuozoccjjbnb5tv", "grid": "grid(131072000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "131072000"], "kernel_file": "/tmp/torchinductor_cvm/j4/cj4ssgwdjcekiff7t7cfceucpuq2k6lgzvcstcuozoccjjbnb5tv.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 11743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942494453.361, "dur": 19.104, + "args": { + "External id": 993761,"Record function id": 0, "Ev Idx": 11744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2338708, "tid": 2379421, + "ts": 6345942494456.903, "dur": 14.640, + "args": { + "External id": 993762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942494461.012, "dur": 9.337, + "args": { + "External id": 993763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2379421, + "ts": 6345942494462.913, "dur": 7.290, + "args": { + "External id": 993764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 11747 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 2338708, "tid": 2379421, + "ts": 6345942494498.459, "dur": 18610.215, + "args": { + "External id": 993765,"Record function id": 0, "Ev Idx": 11748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 2338708, "tid": 2379421, + "ts": 6345942494520.376, "dur": 48.532, + "args": { + "External id": 993766,"Record function id": 0, "Ev Idx": 11749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 2338708, "tid": 2379421, + "ts": 6345942494575.842, "dur": 300.276, + "args": { + "External id": 993767,"Record function id": 0, "Ev Idx": 11750 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 2338708, "tid": 2379421, + "ts": 6345942494884.198, "dur": 17885.836, + "args": { + "External id": 993768,"Record function id": 0, "Ev Idx": 11751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942495047.542, "dur": 47.261, + "args": { + "External id": 993769,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2379421, + "ts": 6345942495112.117, "dur": 6.404, + "args": { + "External id": 993770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 11753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942495144.837, "dur": 15684.092, + "args": { + "External id": 993771,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 11754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2338708, "tid": 2379421, + "ts": 6345942495177.493, "dur": 15634.510, + "args": { + "External id": 993772,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[], [], [], [141824512, 1]], "Input Dims": [[], [], [], [8, 141824512]], "Ev Idx": 11755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942496126.416, "dur": 25.229, + "args": { + "External id": 993773,"Record function id": 0, "Concrete Inputs": ["[277237]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2379421, + "ts": 6345942496401.177, "dur": 14348.673, + "args": { + "External id": 993774,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], [], []], "Ev Idx": 11757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2379421, + "ts": 6345942496405.917, "dur": 14342.497, + "args": { + "External id": 993775,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[277237], [], [], [], [], [], []], "Ev Idx": 11758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942496413.376, "dur": 30.694, + "args": { + "External id": 993776,"Record function id": 0, "Concrete Inputs": ["[277237]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2379421, + "ts": 6345942496450.176, "dur": 14290.525, + "args": { + "External id": 993777,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[277237], [277237], []], "Ev Idx": 11760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942510997.089, "dur": 32.186, + "args": { + "External id": 993778,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1134596096], [], [], [], [], []], "Ev Idx": 11761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2379421, + "ts": 6345942511002.339, "dur": 26.080, + "args": { + "External id": 993779,"Record function id": 0, "Concrete Inputs": ["[141824512]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2338708, "tid": 2379421, + "ts": 6345942511101.662, "dur": 411.604, + "args": { + "External id": 993780,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[141824512], [1134596096], [], [], [], []], "Ev Idx": 11763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942511133.037, "dur": 374.505, + "args": { + "External id": 993781,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 141824512, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[1134596096], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11764, "In msg nelems": 1134596096 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2338708, "tid": 2379421, + "ts": 6345942511146.924, "dur": 354.737, + "args": { + "External id": 993782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1134596096]], "Ev Idx": 11765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2379421, + "ts": 6345942511532.562, "dur": 2.718, + "args": { + "External id": 993783,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11766, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942511607.881, "dur": 7.169, + "args": { + "External id": 993784,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942511629.303, "dur": 36.009, + "args": { + "External id": 993785,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4000, 4096], [4000, 4096], []], "Ev Idx": 11768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942511677.440, "dur": 1.679, + "args": { + "External id": 993786,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942511686.741, "dur": 14.227, + "args": { + "External id": 993787,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942511707.756, "dur": 1.252, + "args": { + "External id": 993788,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "16384512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942511714.094, "dur": 13.078, + "args": { + "External id": 993789,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942511734.006, "dur": 1.179, + "args": { + "External id": 993790,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "18481664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942511740.106, "dur": 11.373, + "args": { + "External id": 993791,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942511757.603, "dur": 1.171, + "args": { + "External id": 993792,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "19005952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942511800.564, "dur": 13.483, + "args": { + "External id": 993793,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942511822.569, "dur": 1.103, + "args": { + "External id": 993794,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "19530240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942511828.710, "dur": 11.243, + "args": { + "External id": 993795,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942511845.495, "dur": 3.682, + "args": { + "External id": 993796,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942511853.680, "dur": 11.933, + "args": { + "External id": 993797,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942511870.584, "dur": 0.796, + "args": { + "External id": 993798,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "21627904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942511876.650, "dur": 11.283, + "args": { + "External id": 993799,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942511892.941, "dur": 0.812, + "args": { + "External id": 993800,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "28967936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942511899.780, "dur": 11.552, + "args": { + "External id": 993801,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942511915.973, "dur": 0.737, + "args": { + "External id": 993802,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "36307968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942511921.301, "dur": 11.096, + "args": { + "External id": 993803,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942511937.099, "dur": 0.941, + "args": { + "External id": 993804,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942511942.525, "dur": 11.497, + "args": { + "External id": 993805,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942511958.746, "dur": 0.958, + "args": { + "External id": 993806,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "43648512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942511964.034, "dur": 11.883, + "args": { + "External id": 993807,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942511981.225, "dur": 0.856, + "args": { + "External id": 993808,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "45745664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942511986.550, "dur": 13.521, + "args": { + "External id": 993809,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512004.835, "dur": 0.925, + "args": { + "External id": 993810,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "46269952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512030.740, "dur": 16.320, + "args": { + "External id": 993811,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512088.740, "dur": 4.420, + "args": { + "External id": 993812,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "46794240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512102.169, "dur": 19.023, + "args": { + "External id": 993813,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512128.289, "dur": 1.009, + "args": { + "External id": 993814,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512133.855, "dur": 12.711, + "args": { + "External id": 993815,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512151.993, "dur": 1.033, + "args": { + "External id": 993816,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "48891904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512157.376, "dur": 13.496, + "args": { + "External id": 993817,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512177.512, "dur": 1.311, + "args": { + "External id": 993818,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "56231936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512183.697, "dur": 12.437, + "args": { + "External id": 993819,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512201.188, "dur": 1.224, + "args": { + "External id": 993820,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "63571968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512206.943, "dur": 13.644, + "args": { + "External id": 993821,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512225.960, "dur": 1.075, + "args": { + "External id": 993822,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512231.613, "dur": 12.985, + "args": { + "External id": 993823,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512250.559, "dur": 1.124, + "args": { + "External id": 993824,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "70912512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512256.788, "dur": 12.399, + "args": { + "External id": 993825,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512274.797, "dur": 1.124, + "args": { + "External id": 993826,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73009664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512280.247, "dur": 12.738, + "args": { + "External id": 993827,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512297.880, "dur": 3.032, + "args": { + "External id": 993828,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "73533952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512305.307, "dur": 11.912, + "args": { + "External id": 993829,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512322.443, "dur": 1.095, + "args": { + "External id": 993830,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "74058240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512327.426, "dur": 10.983, + "args": { + "External id": 993831,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512343.598, "dur": 1.247, + "args": { + "External id": 993832,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512348.750, "dur": 11.871, + "args": { + "External id": 993833,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512365.599, "dur": 1.128, + "args": { + "External id": 993834,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "76155904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512371.759, "dur": 10.939, + "args": { + "External id": 993835,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512387.998, "dur": 0.972, + "args": { + "External id": 993836,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "83495936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512393.564, "dur": 11.717, + "args": { + "External id": 993837,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512410.800, "dur": 0.941, + "args": { + "External id": 993838,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "90835968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512416.318, "dur": 10.890, + "args": { + "External id": 993839,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512432.209, "dur": 0.926, + "args": { + "External id": 993840,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512438.773, "dur": 11.446, + "args": { + "External id": 993841,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512456.132, "dur": 1.018, + "args": { + "External id": 993842,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "98176512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512461.489, "dur": 11.040, + "args": { + "External id": 993843,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512477.368, "dur": 3.101, + "args": { + "External id": 993844,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100273664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512485.089, "dur": 17.788, + "args": { + "External id": 993845,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512510.696, "dur": 0.915, + "args": { + "External id": 993846,"Record function id": 0, "Concrete Inputs": ["", "[128, 4096]", "[4096, 1]", "100797952"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512516.401, "dur": 12.248, + "args": { + "External id": 993847,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[128, 4096], [128, 4096], []], "Ev Idx": 11830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512534.122, "dur": 0.886, + "args": { + "External id": 993848,"Record function id": 0, "Concrete Inputs": ["", "[512, 4096]", "[4096, 1]", "101322240"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512540.089, "dur": 13.337, + "args": { + "External id": 993849,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[512, 4096], [512, 4096], []], "Ev Idx": 11832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512558.788, "dur": 1.207, + "args": { + "External id": 993850,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512564.202, "dur": 11.588, + "args": { + "External id": 993851,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512580.466, "dur": 1.078, + "args": { + "External id": 993852,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "103419904"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512586.459, "dur": 14.061, + "args": { + "External id": 993853,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512605.920, "dur": 0.953, + "args": { + "External id": 993854,"Record function id": 0, "Concrete Inputs": ["", "[1792, 4096]", "[4096, 1]", "110759936"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512611.396, "dur": 12.368, + "args": { + "External id": 993855,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[1792, 4096], [1792, 4096], []], "Ev Idx": 11838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512629.342, "dur": 0.986, + "args": { + "External id": 993856,"Record function id": 0, "Concrete Inputs": ["", "[512, 14336]", "[14336, 1]", "118099968"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512635.476, "dur": 13.934, + "args": { + "External id": 993857,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], []], "Input Dims": [[512, 14336], [512, 14336], []], "Ev Idx": 11840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512654.962, "dur": 0.934, + "args": { + "External id": 993858,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512660.468, "dur": 12.600, + "args": { + "External id": 993859,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[512], [512], []], "Ev Idx": 11842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2379421, + "ts": 6345942512679.006, "dur": 3.192, + "args": { + "External id": 993860,"Record function id": 0, "Concrete Inputs": ["", "[4000, 4096]", "[4096, 1]", "125440512"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2379421, + "ts": 6345942512686.344, "dur": 12.494, + "args": { + "External id": 993861,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4000, 4096], [4000, 4096], []], "Ev Idx": 11844 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "ProfilerStep#22527", "pid": 2338708, "tid": 2338708, + "ts": 6345936068293.533, "dur": 6474545.063, + "args": { + "External id": 972801,"Record function id": 0, "Ev Idx": 11845 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.zero_grad#AdamW.zero_grad", "pid": 2338708, "tid": 2338708, + "ts": 6345936068332.533, "dur": 794.995, + "args": { + "External id": 972802,"Record function id": 0, "Ev Idx": 11846 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 2338708, "tid": 2338708, + "ts": 6345936069188.209, "dur": 2402.367, + "args": { + "External id": 972803,"Record function id": 0, "Ev Idx": 11847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936070443.949, "dur": 9.095, + "args": { + "External id": 972804,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338708, "tid": 2338708, + "ts": 6345936070475.400, "dur": 9.984, + "args": { + "External id": 972805,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 11849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936070947.061, "dur": 2.708, + "args": { + "External id": 972806,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338708, "tid": 2338708, + "ts": 6345936070961.620, "dur": 2.859, + "args": { + "External id": 972807,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 11851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936071418.643, "dur": 2.326, + "args": { + "External id": 972808,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338708, "tid": 2338708, + "ts": 6345936071426.225, "dur": 2.192, + "args": { + "External id": 972809,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 11853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936072138.473, "dur": 19.588, + "args": { + "External id": 972810,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 11854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936072150.616, "dur": 2.856, + "args": { + "External id": 972811,"Record function id": 0, "Concrete Inputs": ["", "[8, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 11855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936072159.801, "dur": 5.123, + "args": { + "External id": 972812,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 11856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936072162.239, "dur": 1.388, + "args": { + "External id": 972813,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 11857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936072197.419, "dur": 2535.435, + "args": { + "External id": 972814,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], [], []], "Ev Idx": 11858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936072206.649, "dur": 2525.550, + "args": { + "External id": 972815,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 11859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936072219.313, "dur": 12.349, + "args": { + "External id": 972816,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936072234.616, "dur": 2495.804, + "args": { + "External id": 972817,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936072245.855, "dur": 0.520, + "args": { + "External id": 972818,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 11862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936072249.273, "dur": 7.890, + "args": { + "External id": 972819,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[8, 4096], [8, 4096]], "Ev Idx": 11863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338708, "tid": 2338708, + "ts": 6345936072252.295, "dur": 4.676, + "args": { + "External id": 972820,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[8, 4096], [], []], "Ev Idx": 11864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936072255.845, "dur": 0.807, + "args": { + "External id": 972821,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345936072259.489, "dur": 164.556, + "args": { + "External id": 972822,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 11866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345936072262.484, "dur": 161.281, + "args": { + "External id": 972823,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 11867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936072265.213, "dur": 28.389, + "args": { + "External id": 972824,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 11868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936072270.618, "dur": 22.121, + "args": { + "External id": 972825,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936072294.385, "dur": 128.880, + "args": { + "External id": 972826,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936072425.990, "dur": 2301.169, + "args": { + "External id": 972827,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936074749.898, "dur": 447.439, + "args": { + "External id": 972828,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 11872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936074751.710, "dur": 444.951, + "args": { + "External id": 972829,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], []], "Ev Idx": 11873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936074759.466, "dur": 8.595, + "args": { + "External id": 972830,"Record function id": 0, "Concrete Inputs": ["[8, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936074772.096, "dur": 421.579, + "args": { + "External id": 972831,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[8, 8192], [8, 8192], []], "Ev Idx": 11875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338708, "tid": 2338708, + "ts": 6345936075228.062, "dur": 59.287, + "args": { + "External id": 972832,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936075233.730, "dur": 6.186, + "args": { + "External id": 972833,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338708, "tid": 2338708, + "ts": 6345936075242.683, "dur": 44.286, + "args": { + "External id": 972834,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 11878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345936075249.076, "dur": 8.664, + "args": { + "External id": 972835,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 11879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 2338708, "tid": 2338708, + "ts": 6345936075300.013, "dur": 83.679, + "args": { + "External id": 972836,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338708, "tid": 2338708, + "ts": 6345936075308.904, "dur": 7.602, + "args": { + "External id": 972837,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 11881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936075314.012, "dur": 2.131, + "args": { + "External id": 972838,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 11882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936075317.825, "dur": 3.606, + "args": { + "External id": 972839,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2338708, + "ts": 6345936075324.241, "dur": 4.911, + "args": { + "External id": 972840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[8, 4096]], "Ev Idx": 11884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338708, "tid": 2338708, + "ts": 6345936075332.227, "dur": 6.517, + "args": { + "External id": 972841,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936075337.507, "dur": 0.694, + "args": { + "External id": 972842,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 11886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338708, "tid": 2338708, + "ts": 6345936075339.775, "dur": 5.493, + "args": { + "External id": 972843,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 11887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936075344.172, "dur": 1.002, + "args": { + "External id": 972844,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 11888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936075347.286, "dur": 7.585, + "args": { + "External id": 972845,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [8, 1, 1, 4096]], "Ev Idx": 11889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338708, "tid": 2338708, + "ts": 6345936075351.696, "dur": 3.056, + "args": { + "External id": 972846,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 11890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936075353.639, "dur": 1.011, + "args": { + "External id": 972847,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 11891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936075355.983, "dur": 26.821, + "args": { + "External id": 972848,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[8, 1, 1, 4096], [8, 1, 1, 4096], []], "Ev Idx": 11892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936075392.860, "dur": 30.946, + "args": { + "External id": 972849,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 11893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936075394.486, "dur": 29.115, + "args": { + "External id": 972850,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 11894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936075400.711, "dur": 3.902, + "args": { + "External id": 972851,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936075405.394, "dur": 17.807, + "args": { + "External id": 972852,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 11896 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936075560.235, "dur": 173.071, + "args": { + "External id": 972853,"Record function id": 0, "Ev Idx": 11897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 2338708, "tid": 2338708, + "ts": 6345936075653.155, "dur": 67.449, + "args": { + "External id": 972854,"Record function id": 0, "Ev Idx": 11898 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936075741.065, "dur": 53.028, + "args": { + "External id": 972855,"Record function id": 0, "Ev Idx": 11899 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936075803.445, "dur": 13238.189, + "args": { + "External id": 972856,"Record function id": 0, "Ev Idx": 11900 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 2338708, "tid": 2338708, + "ts": 6345936075814.242, "dur": 1596.771, + "args": { + "External id": 972857,"Record function id": 0, "Ev Idx": 11901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936075935.269, "dur": 7.773, + "args": { + "External id": 972858,"Record function id": 0, "Concrete Inputs": ["[141824512]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936075961.178, "dur": 210.249, + "args": { + "External id": 972859,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 11903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936075966.416, "dur": 1.593, + "args": { + "External id": 972860,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936075975.325, "dur": 0.487, + "args": { + "External id": 972861,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936075976.638, "dur": 0.633, + "args": { + "External id": 972862,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "16384512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936075978.419, "dur": 3.097, + "args": { + "External id": 972863,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "18481664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936075987.063, "dur": 0.555, + "args": { + "External id": 972864,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19005952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936075988.924, "dur": 0.489, + "args": { + "External id": 972865,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "19530240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936075990.116, "dur": 2.745, + "args": { + "External id": 972866,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936075996.234, "dur": 0.606, + "args": { + "External id": 972867,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "21627904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936075997.596, "dur": 0.548, + "args": { + "External id": 972868,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "28967936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076003.890, "dur": 0.474, + "args": { + "External id": 972869,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "36307968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076005.143, "dur": 0.490, + "args": { + "External id": 972870,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076006.567, "dur": 22.913, + "args": { + "External id": 972871,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "43648512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076034.714, "dur": 0.494, + "args": { + "External id": 972872,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45745664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076036.084, "dur": 0.615, + "args": { + "External id": 972873,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46269952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076037.690, "dur": 2.820, + "args": { + "External id": 972874,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "46794240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076043.726, "dur": 0.451, + "args": { + "External id": 972875,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076045.095, "dur": 0.393, + "args": { + "External id": 972876,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "48891904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076051.447, "dur": 38.021, + "args": { + "External id": 972877,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "56231936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076092.650, "dur": 0.652, + "args": { + "External id": 972878,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "63571968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076094.651, "dur": 2.705, + "args": { + "External id": 972879,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076101.256, "dur": 0.356, + "args": { + "External id": 972880,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "70912512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076102.591, "dur": 0.339, + "args": { + "External id": 972881,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73009664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076103.598, "dur": 2.338, + "args": { + "External id": 972882,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73533952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076109.211, "dur": 0.285, + "args": { + "External id": 972883,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "74058240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076110.172, "dur": 0.384, + "args": { + "External id": 972884,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076116.111, "dur": 0.358, + "args": { + "External id": 972885,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "76155904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076117.196, "dur": 0.339, + "args": { + "External id": 972886,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "83495936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076118.476, "dur": 2.533, + "args": { + "External id": 972887,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "90835968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076124.326, "dur": 0.261, + "args": { + "External id": 972888,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076125.572, "dur": 0.320, + "args": { + "External id": 972889,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "98176512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076126.743, "dur": 2.816, + "args": { + "External id": 972890,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100273664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076132.876, "dur": 0.285, + "args": { + "External id": 972891,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100797952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076133.910, "dur": 0.395, + "args": { + "External id": 972892,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "101322240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076139.619, "dur": 0.459, + "args": { + "External id": 972893,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076140.697, "dur": 0.407, + "args": { + "External id": 972894,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "103419904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076142.046, "dur": 2.661, + "args": { + "External id": 972895,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "110759936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076148.095, "dur": 0.688, + "args": { + "External id": 972896,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "118099968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076149.758, "dur": 0.271, + "args": { + "External id": 972897,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076150.763, "dur": 1.863, + "args": { + "External id": 972898,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "125440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936076201.183, "dur": 157.418, + "args": { + "External id": 972899,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936076436.927, "dur": 359.289, + "args": { + "External id": 972900,"Record function id": 0, "Concrete Inputs": ["", "", "141824512", "8", "2", "15", ""], "Input type": ["TensorList", "", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 11944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936076455.131, "dur": 7.060, + "args": { + "External id": 972901,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 11945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936076469.475, "dur": 16.233, + "args": { + "External id": 972902,"Record function id": 0, "Concrete Inputs": ["", "0", "283649024", "141824512"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 11946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936076474.655, "dur": 10.584, + "args": { + "External id": 972903,"Record function id": 0, "Concrete Inputs": ["", "0", "283649024", "425473536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[1134596096], [], [], [], []], "Ev Idx": 11947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076481.016, "dur": 0.935, + "args": { + "External id": 972904,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "[1]", "283649024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 11948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936076495.324, "dur": 133.799, + "args": { + "External id": 972905,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 11949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076497.709, "dur": 0.676, + "args": { + "External id": 972906,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "283649024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076500.014, "dur": 0.596, + "args": { + "External id": 972907,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "300033024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076504.076, "dur": 2.847, + "args": { + "External id": 972908,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "300033536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076507.957, "dur": 1.145, + "args": { + "External id": 972909,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "302130688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076512.311, "dur": 0.597, + "args": { + "External id": 972910,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "302654976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076513.924, "dur": 0.537, + "args": { + "External id": 972911,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "303179264"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076515.369, "dur": 0.469, + "args": { + "External id": 972912,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "305276416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076519.383, "dur": 0.463, + "args": { + "External id": 972913,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "305276928"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076520.611, "dur": 1.252, + "args": { + "External id": 972914,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "312616960"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076522.687, "dur": 0.401, + "args": { + "External id": 972915,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "319956992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076526.241, "dur": 2.581, + "args": { + "External id": 972916,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "327297024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076529.937, "dur": 0.388, + "args": { + "External id": 972917,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "327297536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076532.983, "dur": 2.590, + "args": { + "External id": 972918,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "329394688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076536.326, "dur": 0.361, + "args": { + "External id": 972919,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "329918976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076537.206, "dur": 0.475, + "args": { + "External id": 972920,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "330443264"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076542.829, "dur": 0.376, + "args": { + "External id": 972921,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "332540416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076543.840, "dur": 0.374, + "args": { + "External id": 972922,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "332540928"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076544.991, "dur": 0.382, + "args": { + "External id": 972923,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "339880960"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076550.914, "dur": 2.064, + "args": { + "External id": 972924,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "347220992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076553.843, "dur": 0.290, + "args": { + "External id": 972925,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "354561024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076557.135, "dur": 2.697, + "args": { + "External id": 972926,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "354561536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076560.685, "dur": 0.355, + "args": { + "External id": 972927,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "356658688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076561.666, "dur": 0.432, + "args": { + "External id": 972928,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "357182976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076567.652, "dur": 0.333, + "args": { + "External id": 972929,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "357707264"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076568.492, "dur": 0.379, + "args": { + "External id": 972930,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "359804416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076569.912, "dur": 0.487, + "args": { + "External id": 972931,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "359804928"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076576.186, "dur": 2.851, + "args": { + "External id": 972932,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "367144960"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076579.837, "dur": 0.555, + "args": { + "External id": 972933,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "374484992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076583.101, "dur": 2.740, + "args": { + "External id": 972934,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "381825024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076586.714, "dur": 0.563, + "args": { + "External id": 972935,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "381825536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076587.882, "dur": 0.703, + "args": { + "External id": 972936,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "383922688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076594.270, "dur": 0.324, + "args": { + "External id": 972937,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "384446976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076595.241, "dur": 0.436, + "args": { + "External id": 972938,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "384971264"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076596.451, "dur": 0.368, + "args": { + "External id": 972939,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "387068416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076602.969, "dur": 2.495, + "args": { + "External id": 972940,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "387068928"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076606.235, "dur": 0.357, + "args": { + "External id": 972941,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "394408960"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076609.503, "dur": 1.940, + "args": { + "External id": 972942,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "401748992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076612.128, "dur": 0.340, + "args": { + "External id": 972943,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "409089024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936076613.091, "dur": 0.420, + "args": { + "External id": 972944,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "409089536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 11988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936076652.707, "dur": 124.650, + "args": { + "External id": 972945,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 11989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936076865.379, "dur": 410.173, + "args": { + "External id": 972946,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[1134596096], [141824512], [], [], []], "Ev Idx": 11990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936076908.586, "dur": 360.113, + "args": { + "External id": 972947,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1134596096, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[141824512], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 11991, "In msg nelems": 141824512 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936076920.554, "dur": 340.783, + "args": { + "External id": 972948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[141824512]], "Ev Idx": 11992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936077308.606, "dur": 2.820, + "args": { + "External id": 972949,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 11993, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 2338708, "tid": 2338708, + "ts": 6345936077429.569, "dur": 11379.128, + "args": { + "External id": 972950,"Record function id": 0, "Ev Idx": 11994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077631.671, "dur": 8.056, + "args": { + "External id": 972951,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 11995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077644.165, "dur": 1.384, + "args": { + "External id": 972952,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 11996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077647.454, "dur": 1.123, + "args": { + "External id": 972953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 11997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077651.288, "dur": 3.672, + "args": { + "External id": 972954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 11998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077656.667, "dur": 0.913, + "args": { + "External id": 972955,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 11999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077659.353, "dur": 1.154, + "args": { + "External id": 972956,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077664.637, "dur": 1.167, + "args": { + "External id": 972957,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077667.168, "dur": 2.340, + "args": { + "External id": 972958,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077673.395, "dur": 0.927, + "args": { + "External id": 972959,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077676.098, "dur": 0.925, + "args": { + "External id": 972960,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077681.348, "dur": 0.597, + "args": { + "External id": 972961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077683.470, "dur": 3.391, + "args": { + "External id": 972962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077688.354, "dur": 0.962, + "args": { + "External id": 972963,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077690.793, "dur": 0.769, + "args": { + "External id": 972964,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077695.165, "dur": 0.928, + "args": { + "External id": 972965,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077697.350, "dur": 2.442, + "args": { + "External id": 972966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077704.602, "dur": 0.905, + "args": { + "External id": 972967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077706.917, "dur": 0.685, + "args": { + "External id": 972968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077711.312, "dur": 0.814, + "args": { + "External id": 972969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077713.690, "dur": 2.905, + "args": { + "External id": 972970,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077718.075, "dur": 0.821, + "args": { + "External id": 972971,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077720.401, "dur": 1.452, + "args": { + "External id": 972972,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077725.441, "dur": 0.799, + "args": { + "External id": 972973,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077727.638, "dur": 2.605, + "args": { + "External id": 972974,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077753.707, "dur": 0.840, + "args": { + "External id": 972975,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077756.288, "dur": 0.807, + "args": { + "External id": 972976,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077761.038, "dur": 0.943, + "args": { + "External id": 972977,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077763.272, "dur": 3.180, + "args": { + "External id": 972978,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077767.934, "dur": 0.957, + "args": { + "External id": 972979,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077770.228, "dur": 0.727, + "args": { + "External id": 972980,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077774.775, "dur": 0.968, + "args": { + "External id": 972981,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077777.155, "dur": 2.453, + "args": { + "External id": 972982,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077783.937, "dur": 0.842, + "args": { + "External id": 972983,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077786.240, "dur": 0.823, + "args": { + "External id": 972984,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077791.021, "dur": 0.724, + "args": { + "External id": 972985,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077793.134, "dur": 2.968, + "args": { + "External id": 972986,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077797.510, "dur": 0.939, + "args": { + "External id": 972987,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077799.843, "dur": 0.910, + "args": { + "External id": 972988,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077805.010, "dur": 1.045, + "args": { + "External id": 972989,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936077807.364, "dur": 2.583, + "args": { + "External id": 972990,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 12034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936077844.035, "dur": 10897.384, + "args": { + "External id": 972991,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 12035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936077872.117, "dur": 10857.380, + "args": { + "External id": 972992,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 12036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936077898.128, "dur": 6.064, + "args": { + "External id": 972993,"Record function id": 0, "Concrete Inputs": ["[4384]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936077911.267, "dur": 10767.970, + "args": { + "External id": 972994,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], [], []], "Ev Idx": 12038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936077916.982, "dur": 10761.179, + "args": { + "External id": 972995,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], []], "Ev Idx": 12039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936077926.294, "dur": 6.868, + "args": { + "External id": 972996,"Record function id": 0, "Concrete Inputs": ["[4384]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936077935.148, "dur": 10738.899, + "args": { + "External id": 972997,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4384], [4384], []], "Ev Idx": 12041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936089185.429, "dur": 42.408, + "args": { + "External id": 972998,"Record function id": 0, "Ev Idx": 12042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 2338708, "tid": 2338708, + "ts": 6345936089229.585, "dur": 247.431, + "args": { + "External id": 972999,"Record function id": 0, "Ev Idx": 12043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936089278.297, "dur": 187.524, + "args": { + "External id": 973000,"Sequence number": 10552242, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32000, 4096], [8, 4096]], "Ev Idx": 12044 + } + }, + { + "ph": "s", "id": 224, "pid": 2338708, "tid": 2338708, "ts": 6345936089278.297, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936089361.844, "dur": 60.141, + "args": { + "External id": 973001,"kernel_hash": "cljo2nzima3hpaovvfppftdgufxpb4dtilebb6n5aksulaywtrgm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/lj/cljo2nzima3hpaovvfppftdgufxpb4dtilebb6n5aksulaywtrgm.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096], [32000, 4096], [8, 4096, 4096], []], "Ev Idx": 12045 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936089548.620, "dur": 65.405, + "args": { + "External id": 973002,"Record function id": 0, "Ev Idx": 12046 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 2338708, "tid": 2338708, + "ts": 6345936089626.291, "dur": 9301.843, + "args": { + "External id": 973003,"Record function id": 0, "Ev Idx": 12047 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338708, "tid": 2338708, + "ts": 6345936089635.882, "dur": 1062.855, + "args": { + "External id": 973004,"Record function id": 0, "Ev Idx": 12048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936089716.919, "dur": 13.058, + "args": { + "External id": 973005,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936089744.723, "dur": 47.810, + "args": { + "External id": 973006,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089755.711, "dur": 2.987, + "args": { + "External id": 973007,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089760.625, "dur": 0.445, + "args": { + "External id": 973008,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089761.926, "dur": 2.554, + "args": { + "External id": 973009,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089767.772, "dur": 0.578, + "args": { + "External id": 973010,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089769.241, "dur": 0.443, + "args": { + "External id": 973011,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089775.679, "dur": 0.423, + "args": { + "External id": 973012,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089777.009, "dur": 0.650, + "args": { + "External id": 973013,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089778.294, "dur": 3.288, + "args": { + "External id": 973014,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089785.166, "dur": 0.712, + "args": { + "External id": 973015,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936089804.443, "dur": 56.931, + "args": { + "External id": 973016,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936089898.103, "dur": 238.063, + "args": { + "External id": 973017,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936089910.982, "dur": 5.900, + "args": { + "External id": 973018,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936089923.210, "dur": 13.679, + "args": { + "External id": 973019,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936089928.422, "dur": 7.917, + "args": { + "External id": 973020,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089934.190, "dur": 0.673, + "args": { + "External id": 973021,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936089943.813, "dur": 35.524, + "args": { + "External id": 973022,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089946.033, "dur": 0.699, + "args": { + "External id": 973023,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089950.777, "dur": 0.637, + "args": { + "External id": 973024,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089954.630, "dur": 0.377, + "args": { + "External id": 973025,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089958.271, "dur": 0.701, + "args": { + "External id": 973026,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089959.921, "dur": 4.793, + "args": { + "External id": 973027,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089965.418, "dur": 0.380, + "args": { + "External id": 973028,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089969.532, "dur": 0.274, + "args": { + "External id": 973029,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089973.001, "dur": 0.394, + "args": { + "External id": 973030,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936089974.185, "dur": 0.287, + "args": { + "External id": 973031,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936089993.127, "dur": 128.740, + "args": { + "External id": 973032,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936090201.501, "dur": 388.736, + "args": { + "External id": 973033,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936090238.178, "dur": 346.285, + "args": { + "External id": 973034,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12078, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936090248.886, "dur": 329.213, + "args": { + "External id": 973035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936090614.947, "dur": 2.783, + "args": { + "External id": 973036,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12080, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338708, "tid": 2338708, + "ts": 6345936090722.348, "dur": 7972.876, + "args": { + "External id": 973037,"Record function id": 0, "Ev Idx": 12081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936090832.255, "dur": 7.437, + "args": { + "External id": 973038,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936090843.771, "dur": 1.350, + "args": { + "External id": 973039,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936090846.918, "dur": 1.369, + "args": { + "External id": 973040,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936090850.091, "dur": 3.081, + "args": { + "External id": 973041,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936090857.247, "dur": 0.933, + "args": { + "External id": 973042,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936090859.719, "dur": 0.986, + "args": { + "External id": 973043,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936090862.442, "dur": 0.973, + "args": { + "External id": 973044,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936090867.340, "dur": 2.951, + "args": { + "External id": 973045,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936090874.538, "dur": 1.000, + "args": { + "External id": 973046,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936090877.131, "dur": 0.941, + "args": { + "External id": 973047,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936090898.704, "dur": 7746.468, + "args": { + "External id": 973048,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936090917.767, "dur": 7718.596, + "args": { + "External id": 973049,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936090945.807, "dur": 16.982, + "args": { + "External id": 973050,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936090966.744, "dur": 7629.247, + "args": { + "External id": 973051,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936090972.115, "dur": 7623.033, + "args": { + "External id": 973052,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936090978.832, "dur": 8.361, + "args": { + "External id": 973053,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936090989.253, "dur": 7602.150, + "args": { + "External id": 973054,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936098860.931, "dur": 36.947, + "args": { + "External id": 973055,"Sequence number": 10552243, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12099 + } + }, + { + "ph": "s", "id": 223, "pid": 2338708, "tid": 2338708, "ts": 6345936098860.931, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936098880.744, "dur": 11.869, + "args": { + "External id": 973056,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936098887.500, "dur": 4.861, + "args": { + "External id": 973057,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936098977.759, "dur": 171.340, + "args": { + "External id": 973058,"Record function id": 0, "Ev Idx": 12102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936099152.546, "dur": 1332.362, + "args": { + "External id": 973059,"Record function id": 0, "Ev Idx": 12103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936099202.300, "dur": 1265.377, + "args": { + "External id": 973060,"Sequence number": 10552244, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12104 + } + }, + { + "ph": "s", "id": 222, "pid": 2338708, "tid": 2338708, "ts": 6345936099202.300, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936099284.853, "dur": 60.348, + "args": { + "External id": 973061,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936099361.437, "dur": 120.004, + "args": { + "External id": 973062,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936099494.555, "dur": 42.953, + "args": { + "External id": 973063,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936099547.941, "dur": 34.342, + "args": { + "External id": 973064,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936099613.422, "dur": 31.980, + "args": { + "External id": 973065,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936099674.061, "dur": 23.019, + "args": { + "External id": 973066,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936099725.008, "dur": 154.664, + "args": { + "External id": 973067,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936099788.462, "dur": 14.451, + "args": { + "External id": 973068,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936099794.417, "dur": 7.403, + "args": { + "External id": 973069,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936099805.800, "dur": 4.349, + "args": { + "External id": 973070,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936099814.068, "dur": 1.201, + "args": { + "External id": 973071,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936099818.689, "dur": 3.319, + "args": { + "External id": 973072,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936099891.706, "dur": 55.272, + "args": { + "External id": 973073,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936099988.055, "dur": 58.307, + "args": { + "External id": 973074,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936100103.813, "dur": 57.051, + "args": { + "External id": 973075,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936100174.579, "dur": 39.720, + "args": { + "External id": 973076,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936100242.199, "dur": 29.031, + "args": { + "External id": 973077,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936100280.933, "dur": 42.771, + "args": { + "External id": 973078,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936100352.007, "dur": 23.440, + "args": { + "External id": 973079,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12123 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 2338708, "tid": 2338708, + "ts": 6345936100566.396, "dur": 101.430, + "args": { + "External id": 973080,"Record function id": 0, "Ev Idx": 12124 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936100754.983, "dur": 55.540, + "args": { + "External id": 973081,"Record function id": 0, "Ev Idx": 12125 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 2338708, "tid": 2338708, + "ts": 6345936100820.908, "dur": 26932.954, + "args": { + "External id": 973082,"Record function id": 0, "Ev Idx": 12126 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338708, "tid": 2338708, + "ts": 6345936100832.400, "dur": 1283.427, + "args": { + "External id": 973083,"Record function id": 0, "Ev Idx": 12127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936100922.775, "dur": 9.836, + "args": { + "External id": 973084,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936100948.152, "dur": 50.669, + "args": { + "External id": 973085,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936100957.541, "dur": 4.816, + "args": { + "External id": 973086,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936100964.176, "dur": 0.611, + "args": { + "External id": 973087,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936100968.569, "dur": 0.655, + "args": { + "External id": 973088,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936100972.718, "dur": 0.380, + "args": { + "External id": 973089,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936100973.932, "dur": 2.941, + "args": { + "External id": 973090,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936100980.521, "dur": 0.480, + "args": { + "External id": 973091,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936100983.033, "dur": 0.553, + "args": { + "External id": 973092,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936100984.396, "dur": 0.650, + "args": { + "External id": 973093,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936100988.373, "dur": 2.524, + "args": { + "External id": 973094,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936101036.071, "dur": 118.989, + "args": { + "External id": 973095,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936101199.451, "dur": 163.497, + "args": { + "External id": 973096,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936101219.027, "dur": 6.821, + "args": { + "External id": 973097,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936101232.206, "dur": 13.341, + "args": { + "External id": 973098,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936101237.702, "dur": 7.338, + "args": { + "External id": 973099,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936101241.497, "dur": 1.359, + "args": { + "External id": 973100,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936101253.973, "dur": 39.681, + "args": { + "External id": 973101,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936101258.617, "dur": 0.855, + "args": { + "External id": 973102,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936101260.900, "dur": 3.606, + "args": { + "External id": 973103,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936101265.214, "dur": 3.147, + "args": { + "External id": 973104,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936101271.971, "dur": 0.621, + "args": { + "External id": 973105,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936101273.241, "dur": 0.590, + "args": { + "External id": 973106,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936101276.774, "dur": 0.405, + "args": { + "External id": 973107,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936101280.157, "dur": 0.344, + "args": { + "External id": 973108,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936101281.547, "dur": 0.382, + "args": { + "External id": 973109,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936101288.072, "dur": 0.648, + "args": { + "External id": 973110,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936101308.380, "dur": 42.631, + "args": { + "External id": 973111,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936101431.837, "dur": 512.199, + "args": { + "External id": 973112,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936101471.734, "dur": 465.932, + "args": { + "External id": 973113,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12157, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936101483.157, "dur": 447.462, + "args": { + "External id": 973114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936101968.908, "dur": 3.201, + "args": { + "External id": 973115,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12159, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338708, "tid": 2338708, + "ts": 6345936102144.036, "dur": 25385.421, + "args": { + "External id": 973116,"Record function id": 0, "Ev Idx": 12160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936102261.878, "dur": 8.102, + "args": { + "External id": 973117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936102274.403, "dur": 1.518, + "args": { + "External id": 973118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936102277.794, "dur": 1.323, + "args": { + "External id": 973119,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936102280.763, "dur": 1.267, + "args": { + "External id": 973120,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936102283.547, "dur": 0.954, + "args": { + "External id": 973121,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936102288.609, "dur": 1.350, + "args": { + "External id": 973122,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936102293.897, "dur": 0.959, + "args": { + "External id": 973123,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936102296.237, "dur": 5.466, + "args": { + "External id": 973124,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936102303.443, "dur": 1.076, + "args": { + "External id": 973125,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936102308.347, "dur": 1.043, + "args": { + "External id": 973126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936102329.816, "dur": 25150.767, + "args": { + "External id": 973127,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936102349.841, "dur": 25121.949, + "args": { + "External id": 973128,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936102385.497, "dur": 17.968, + "args": { + "External id": 973129,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936102407.897, "dur": 25021.230, + "args": { + "External id": 973130,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936102411.467, "dur": 25016.914, + "args": { + "External id": 973131,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936102417.640, "dur": 7.270, + "args": { + "External id": 973132,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936102426.977, "dur": 24997.796, + "args": { + "External id": 973133,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936127687.129, "dur": 35.749, + "args": { + "External id": 973134,"Sequence number": 10552245, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12178 + } + }, + { + "ph": "s", "id": 221, "pid": 2338708, "tid": 2338708, "ts": 6345936127687.129, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936127705.718, "dur": 11.743, + "args": { + "External id": 973135,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936127711.881, "dur": 5.247, + "args": { + "External id": 973136,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936127800.030, "dur": 86.105, + "args": { + "External id": 973137,"Record function id": 0, "Ev Idx": 12181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936127887.990, "dur": 1382.763, + "args": { + "External id": 973138,"Record function id": 0, "Ev Idx": 12182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936127929.374, "dur": 1324.198, + "args": { + "External id": 973139,"Sequence number": 10552246, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12183 + } + }, + { + "ph": "s", "id": 220, "pid": 2338708, "tid": 2338708, "ts": 6345936127929.374, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936128037.686, "dur": 112.428, + "args": { + "External id": 973140,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936128172.647, "dur": 116.383, + "args": { + "External id": 973141,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936128301.417, "dur": 41.426, + "args": { + "External id": 973142,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936128349.567, "dur": 32.243, + "args": { + "External id": 973143,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936128415.142, "dur": 27.913, + "args": { + "External id": 973144,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936128468.666, "dur": 23.268, + "args": { + "External id": 973145,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936128514.183, "dur": 159.007, + "args": { + "External id": 973146,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936128577.011, "dur": 13.320, + "args": { + "External id": 973147,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936128582.877, "dur": 6.295, + "args": { + "External id": 973148,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936128595.751, "dur": 6.821, + "args": { + "External id": 973149,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936128603.896, "dur": 3.624, + "args": { + "External id": 973150,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936128610.440, "dur": 3.399, + "args": { + "External id": 973151,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936128686.089, "dur": 51.827, + "args": { + "External id": 973152,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936128774.134, "dur": 33.183, + "args": { + "External id": 973153,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936128818.134, "dur": 47.617, + "args": { + "External id": 973154,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936128873.957, "dur": 40.364, + "args": { + "External id": 973155,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936128940.263, "dur": 31.795, + "args": { + "External id": 973156,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936128978.985, "dur": 135.033, + "args": { + "External id": 973157,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936129144.197, "dur": 28.050, + "args": { + "External id": 973158,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12202 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 2338708, "tid": 2338708, + "ts": 6345936129349.226, "dur": 94.370, + "args": { + "External id": 973159,"Record function id": 0, "Ev Idx": 12203 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936129529.631, "dur": 54.462, + "args": { + "External id": 973160,"Record function id": 0, "Ev Idx": 12204 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 2338708, "tid": 2338708, + "ts": 6345936129594.420, "dur": 29613.142, + "args": { + "External id": 973161,"Record function id": 0, "Ev Idx": 12205 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338708, "tid": 2338708, + "ts": 6345936129606.700, "dur": 1097.969, + "args": { + "External id": 973162,"Record function id": 0, "Ev Idx": 12206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936129699.000, "dur": 9.447, + "args": { + "External id": 973163,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936129725.193, "dur": 48.021, + "args": { + "External id": 973164,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129731.652, "dur": 2.721, + "args": { + "External id": 973165,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129738.821, "dur": 0.501, + "args": { + "External id": 973166,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129743.398, "dur": 0.569, + "args": { + "External id": 973167,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129745.009, "dur": 0.695, + "args": { + "External id": 973168,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129749.424, "dur": 0.504, + "args": { + "External id": 973169,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129753.388, "dur": 0.679, + "args": { + "External id": 973170,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129754.968, "dur": 5.134, + "args": { + "External id": 973171,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129763.642, "dur": 0.537, + "args": { + "External id": 973172,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129765.147, "dur": 0.637, + "args": { + "External id": 973173,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936129785.219, "dur": 61.855, + "args": { + "External id": 973174,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936129887.465, "dur": 221.917, + "args": { + "External id": 973175,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936129901.609, "dur": 4.836, + "args": { + "External id": 973176,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936129912.509, "dur": 11.845, + "args": { + "External id": 973177,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936129917.815, "dur": 6.059, + "args": { + "External id": 973178,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129921.541, "dur": 0.873, + "args": { + "External id": 973179,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936129932.467, "dur": 43.839, + "args": { + "External id": 973180,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129937.478, "dur": 3.370, + "args": { + "External id": 973181,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129942.150, "dur": 0.647, + "args": { + "External id": 973182,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129943.831, "dur": 0.813, + "args": { + "External id": 973183,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129950.549, "dur": 3.046, + "args": { + "External id": 973184,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129954.691, "dur": 0.336, + "args": { + "External id": 973185,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129958.596, "dur": 0.558, + "args": { + "External id": 973186,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129962.467, "dur": 0.363, + "args": { + "External id": 973187,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129963.555, "dur": 0.523, + "args": { + "External id": 973188,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936129967.728, "dur": 2.967, + "args": { + "External id": 973189,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936129989.041, "dur": 108.270, + "args": { + "External id": 973190,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936130178.409, "dur": 415.012, + "args": { + "External id": 973191,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936130217.888, "dur": 370.202, + "args": { + "External id": 973192,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12236, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936130231.122, "dur": 350.956, + "args": { + "External id": 973193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936130621.443, "dur": 2.864, + "args": { + "External id": 973194,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12238, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338708, "tid": 2338708, + "ts": 6345936130729.291, "dur": 28195.352, + "args": { + "External id": 973195,"Record function id": 0, "Ev Idx": 12239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936130844.023, "dur": 7.365, + "args": { + "External id": 973196,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936130855.491, "dur": 1.936, + "args": { + "External id": 973197,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936130859.563, "dur": 3.414, + "args": { + "External id": 973198,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936130865.258, "dur": 0.933, + "args": { + "External id": 973199,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936130867.758, "dur": 0.980, + "args": { + "External id": 973200,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936130870.518, "dur": 1.287, + "args": { + "External id": 973201,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936130876.434, "dur": 1.219, + "args": { + "External id": 973202,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936130879.710, "dur": 2.959, + "args": { + "External id": 973203,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936130884.248, "dur": 0.900, + "args": { + "External id": 973204,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936130886.787, "dur": 0.837, + "args": { + "External id": 973205,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936130912.465, "dur": 27961.585, + "args": { + "External id": 973206,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936130929.656, "dur": 27934.936, + "args": { + "External id": 973207,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936130951.993, "dur": 19.455, + "args": { + "External id": 973208,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936130975.638, "dur": 27848.263, + "args": { + "External id": 973209,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936130978.547, "dur": 27844.552, + "args": { + "External id": 973210,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936130986.054, "dur": 6.100, + "args": { + "External id": 973211,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936130993.990, "dur": 27825.113, + "args": { + "External id": 973212,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936159138.104, "dur": 38.995, + "args": { + "External id": 973213,"Sequence number": 10552247, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12257 + } + }, + { + "ph": "s", "id": 219, "pid": 2338708, "tid": 2338708, "ts": 6345936159138.104, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936159161.258, "dur": 10.132, + "args": { + "External id": 973214,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936159165.439, "dur": 5.581, + "args": { + "External id": 973215,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936159255.886, "dur": 82.399, + "args": { + "External id": 973216,"Record function id": 0, "Ev Idx": 12260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936159339.584, "dur": 1304.377, + "args": { + "External id": 973217,"Record function id": 0, "Ev Idx": 12261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936159385.890, "dur": 1241.980, + "args": { + "External id": 973218,"Sequence number": 10552248, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12262 + } + }, + { + "ph": "s", "id": 218, "pid": 2338708, "tid": 2338708, "ts": 6345936159385.890, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936159465.883, "dur": 57.508, + "args": { + "External id": 973219,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936159538.310, "dur": 118.538, + "args": { + "External id": 973220,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936159670.865, "dur": 41.397, + "args": { + "External id": 973221,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936159723.265, "dur": 32.904, + "args": { + "External id": 973222,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936159785.610, "dur": 28.840, + "args": { + "External id": 973223,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936159837.111, "dur": 22.118, + "args": { + "External id": 973224,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936159885.133, "dur": 242.340, + "args": { + "External id": 973225,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936159942.840, "dur": 14.556, + "args": { + "External id": 973226,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936159950.555, "dur": 5.900, + "args": { + "External id": 973227,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936159960.512, "dur": 5.824, + "args": { + "External id": 973228,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936159967.987, "dur": 1.369, + "args": { + "External id": 973229,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936159972.020, "dur": 6.316, + "args": { + "External id": 973230,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936160143.693, "dur": 68.260, + "args": { + "External id": 973231,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936160252.455, "dur": 35.155, + "args": { + "External id": 973232,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936160298.460, "dur": 47.766, + "args": { + "External id": 973233,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936160356.059, "dur": 39.920, + "args": { + "External id": 973234,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936160421.274, "dur": 28.252, + "args": { + "External id": 973235,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936160458.527, "dur": 39.445, + "args": { + "External id": 973236,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936160522.428, "dur": 20.347, + "args": { + "External id": 973237,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12281 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 2338708, "tid": 2338708, + "ts": 6345936160722.604, "dur": 88.501, + "args": { + "External id": 973238,"Record function id": 0, "Ev Idx": 12282 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936160898.203, "dur": 52.806, + "args": { + "External id": 973239,"Record function id": 0, "Ev Idx": 12283 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 2338708, "tid": 2338708, + "ts": 6345936160961.708, "dur": 28218.404, + "args": { + "External id": 973240,"Record function id": 0, "Ev Idx": 12284 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338708, "tid": 2338708, + "ts": 6345936160973.769, "dur": 1193.473, + "args": { + "External id": 973241,"Record function id": 0, "Ev Idx": 12285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936161133.817, "dur": 11.458, + "args": { + "External id": 973242,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936161161.263, "dur": 51.982, + "args": { + "External id": 973243,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161167.971, "dur": 2.783, + "args": { + "External id": 973244,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161179.030, "dur": 0.433, + "args": { + "External id": 973245,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161180.333, "dur": 0.702, + "args": { + "External id": 973246,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161187.833, "dur": 0.536, + "args": { + "External id": 973247,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161193.330, "dur": 0.608, + "args": { + "External id": 973248,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161194.757, "dur": 0.539, + "args": { + "External id": 973249,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161198.376, "dur": 4.654, + "args": { + "External id": 973250,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161203.976, "dur": 0.443, + "args": { + "External id": 973251,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161204.938, "dur": 0.337, + "args": { + "External id": 973252,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936161228.126, "dur": 66.167, + "args": { + "External id": 973253,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936161335.465, "dur": 158.019, + "args": { + "External id": 973254,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936161350.143, "dur": 4.708, + "args": { + "External id": 973255,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936161360.974, "dur": 11.751, + "args": { + "External id": 973256,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936161366.738, "dur": 5.525, + "args": { + "External id": 973257,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161370.131, "dur": 0.623, + "args": { + "External id": 973258,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936161382.919, "dur": 40.996, + "args": { + "External id": 973259,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161384.807, "dur": 3.098, + "args": { + "External id": 973260,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161389.051, "dur": 0.512, + "args": { + "External id": 973261,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161394.429, "dur": 0.733, + "args": { + "External id": 973262,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161398.653, "dur": 2.808, + "args": { + "External id": 973263,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161404.283, "dur": 0.611, + "args": { + "External id": 973264,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161406.140, "dur": 0.401, + "args": { + "External id": 973265,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161409.491, "dur": 0.365, + "args": { + "External id": 973266,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161413.615, "dur": 0.635, + "args": { + "External id": 973267,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936161415.450, "dur": 2.610, + "args": { + "External id": 973268,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936161445.863, "dur": 38.277, + "args": { + "External id": 973269,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936161560.727, "dur": 419.385, + "args": { + "External id": 973270,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936161601.063, "dur": 373.757, + "args": { + "External id": 973271,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12315, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936161617.208, "dur": 350.984, + "args": { + "External id": 973272,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936162006.908, "dur": 29.129, + "args": { + "External id": 973273,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12317, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338708, "tid": 2338708, + "ts": 6345936162193.686, "dur": 26708.165, + "args": { + "External id": 973274,"Record function id": 0, "Ev Idx": 12318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936162314.157, "dur": 8.003, + "args": { + "External id": 973275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936162326.315, "dur": 1.085, + "args": { + "External id": 973276,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936162329.721, "dur": 4.019, + "args": { + "External id": 973277,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936162335.872, "dur": 1.463, + "args": { + "External id": 973278,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936162339.186, "dur": 1.154, + "args": { + "External id": 973279,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936162342.018, "dur": 0.870, + "args": { + "External id": 973280,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936162347.253, "dur": 0.941, + "args": { + "External id": 973281,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936162349.979, "dur": 3.225, + "args": { + "External id": 973282,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936162354.908, "dur": 0.728, + "args": { + "External id": 973283,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936162357.005, "dur": 0.903, + "args": { + "External id": 973284,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936162382.960, "dur": 26466.973, + "args": { + "External id": 973285,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936162401.791, "dur": 26439.167, + "args": { + "External id": 973286,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936162425.554, "dur": 19.413, + "args": { + "External id": 973287,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936162448.531, "dur": 26350.911, + "args": { + "External id": 973288,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936162451.751, "dur": 26346.778, + "args": { + "External id": 973289,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936162460.279, "dur": 7.221, + "args": { + "External id": 973290,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936162469.832, "dur": 26324.748, + "args": { + "External id": 973291,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936189101.476, "dur": 44.976, + "args": { + "External id": 973292,"Sequence number": 10552249, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12336 + } + }, + { + "ph": "s", "id": 217, "pid": 2338708, "tid": 2338708, "ts": 6345936189101.476, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936189129.643, "dur": 11.114, + "args": { + "External id": 973293,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936189134.364, "dur": 5.963, + "args": { + "External id": 973294,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936189228.310, "dur": 83.083, + "args": { + "External id": 973295,"Record function id": 0, "Ev Idx": 12339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936189315.379, "dur": 1294.821, + "args": { + "External id": 973296,"Record function id": 0, "Ev Idx": 12340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936189359.722, "dur": 1233.468, + "args": { + "External id": 973297,"Sequence number": 10552250, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12341 + } + }, + { + "ph": "s", "id": 216, "pid": 2338708, "tid": 2338708, "ts": 6345936189359.722, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936189433.448, "dur": 56.622, + "args": { + "External id": 973298,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936189504.132, "dur": 117.642, + "args": { + "External id": 973299,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936189637.941, "dur": 41.776, + "args": { + "External id": 973300,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936189687.009, "dur": 34.220, + "args": { + "External id": 973301,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936189753.813, "dur": 30.134, + "args": { + "External id": 973302,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936189805.351, "dur": 19.819, + "args": { + "External id": 973303,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936189849.843, "dur": 154.499, + "args": { + "External id": 973304,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936189913.787, "dur": 12.638, + "args": { + "External id": 973305,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936189919.632, "dur": 5.776, + "args": { + "External id": 973306,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936189929.444, "dur": 4.721, + "args": { + "External id": 973307,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936189935.771, "dur": 0.965, + "args": { + "External id": 973308,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936189942.415, "dur": 5.557, + "args": { + "External id": 973309,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936190040.695, "dur": 103.196, + "args": { + "External id": 973310,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936190190.117, "dur": 36.400, + "args": { + "External id": 973311,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936190239.038, "dur": 49.628, + "args": { + "External id": 973312,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936190319.750, "dur": 40.122, + "args": { + "External id": 973313,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936190391.961, "dur": 27.589, + "args": { + "External id": 973314,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936190426.957, "dur": 38.930, + "args": { + "External id": 973315,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936190485.155, "dur": 22.872, + "args": { + "External id": 973316,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12360 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 2338708, "tid": 2338708, + "ts": 6345936190691.258, "dur": 93.318, + "args": { + "External id": 973317,"Record function id": 0, "Ev Idx": 12361 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936190871.295, "dur": 53.089, + "args": { + "External id": 973318,"Record function id": 0, "Ev Idx": 12362 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 2338708, "tid": 2338708, + "ts": 6345936190934.398, "dur": 29240.057, + "args": { + "External id": 973319,"Record function id": 0, "Ev Idx": 12363 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338708, "tid": 2338708, + "ts": 6345936190946.653, "dur": 1143.459, + "args": { + "External id": 973320,"Record function id": 0, "Ev Idx": 12364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936191100.263, "dur": 12.286, + "args": { + "External id": 973321,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936191130.386, "dur": 49.448, + "args": { + "External id": 973322,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191139.377, "dur": 2.690, + "args": { + "External id": 973323,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191146.605, "dur": 0.499, + "args": { + "External id": 973324,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191148.287, "dur": 0.448, + "args": { + "External id": 973325,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191151.783, "dur": 0.659, + "args": { + "External id": 973326,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191155.687, "dur": 0.411, + "args": { + "External id": 973327,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191159.632, "dur": 0.425, + "args": { + "External id": 973328,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191161.000, "dur": 5.365, + "args": { + "External id": 973329,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191167.029, "dur": 0.478, + "args": { + "External id": 973330,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191170.969, "dur": 0.638, + "args": { + "External id": 973331,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936191193.608, "dur": 67.972, + "args": { + "External id": 973332,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936191302.582, "dur": 146.342, + "args": { + "External id": 973333,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936191317.221, "dur": 4.479, + "args": { + "External id": 973334,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936191327.716, "dur": 14.484, + "args": { + "External id": 973335,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936191333.188, "dur": 8.554, + "args": { + "External id": 973336,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191339.387, "dur": 0.949, + "args": { + "External id": 973337,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936191349.456, "dur": 37.625, + "args": { + "External id": 973338,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191350.932, "dur": 3.358, + "args": { + "External id": 973339,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191357.833, "dur": 0.484, + "args": { + "External id": 973340,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191359.116, "dur": 0.659, + "args": { + "External id": 973341,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191365.797, "dur": 3.064, + "args": { + "External id": 973342,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191369.616, "dur": 0.371, + "args": { + "External id": 973343,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191370.521, "dur": 0.656, + "args": { + "External id": 973344,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191376.477, "dur": 0.355, + "args": { + "External id": 973345,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191377.749, "dur": 0.572, + "args": { + "External id": 973346,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936191379.005, "dur": 2.689, + "args": { + "External id": 973347,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936191400.339, "dur": 38.529, + "args": { + "External id": 973348,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936191514.357, "dur": 412.776, + "args": { + "External id": 973349,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936191556.279, "dur": 365.207, + "args": { + "External id": 973350,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12394, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936191568.021, "dur": 346.384, + "args": { + "External id": 973351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936191950.958, "dur": 3.088, + "args": { + "External id": 973352,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12396, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338708, "tid": 2338708, + "ts": 6345936192119.507, "dur": 27793.275, + "args": { + "External id": 973353,"Record function id": 0, "Ev Idx": 12397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936192241.410, "dur": 8.028, + "args": { + "External id": 973354,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936192253.353, "dur": 1.355, + "args": { + "External id": 973355,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936192256.372, "dur": 3.239, + "args": { + "External id": 973356,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936192261.708, "dur": 1.273, + "args": { + "External id": 973357,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936192264.532, "dur": 1.102, + "args": { + "External id": 973358,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936192267.104, "dur": 1.230, + "args": { + "External id": 973359,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936192272.639, "dur": 1.024, + "args": { + "External id": 973360,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936192277.690, "dur": 2.456, + "args": { + "External id": 973361,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936192281.543, "dur": 0.874, + "args": { + "External id": 973362,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936192283.863, "dur": 0.619, + "args": { + "External id": 973363,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936192307.285, "dur": 27558.113, + "args": { + "External id": 973364,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936192327.360, "dur": 27529.059, + "args": { + "External id": 973365,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936192349.503, "dur": 19.976, + "args": { + "External id": 973366,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936192373.313, "dur": 27443.130, + "args": { + "External id": 973367,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936192376.503, "dur": 27439.310, + "args": { + "External id": 973368,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936192383.481, "dur": 6.074, + "args": { + "External id": 973369,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936192391.408, "dur": 27420.861, + "args": { + "External id": 973370,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936220105.939, "dur": 38.078, + "args": { + "External id": 973371,"Sequence number": 10552251, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12415 + } + }, + { + "ph": "s", "id": 215, "pid": 2338708, "tid": 2338708, "ts": 6345936220105.939, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936220125.807, "dur": 12.649, + "args": { + "External id": 973372,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936220132.724, "dur": 5.328, + "args": { + "External id": 973373,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936220220.924, "dur": 83.325, + "args": { + "External id": 973374,"Record function id": 0, "Ev Idx": 12418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936220305.749, "dur": 1272.839, + "args": { + "External id": 973375,"Record function id": 0, "Ev Idx": 12419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936220350.345, "dur": 1211.739, + "args": { + "External id": 973376,"Sequence number": 10552252, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12420 + } + }, + { + "ph": "s", "id": 214, "pid": 2338708, "tid": 2338708, "ts": 6345936220350.345, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936220427.245, "dur": 57.519, + "args": { + "External id": 973377,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936220497.609, "dur": 118.436, + "args": { + "External id": 973378,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936220628.652, "dur": 41.393, + "args": { + "External id": 973379,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936220679.743, "dur": 34.314, + "args": { + "External id": 973380,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936220744.208, "dur": 32.009, + "args": { + "External id": 973381,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936220800.827, "dur": 20.044, + "args": { + "External id": 973382,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936220845.094, "dur": 152.815, + "args": { + "External id": 973383,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936220905.192, "dur": 12.291, + "args": { + "External id": 973384,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936220910.569, "dur": 5.957, + "args": { + "External id": 973385,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936220920.446, "dur": 5.382, + "args": { + "External id": 973386,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936220929.430, "dur": 1.035, + "args": { + "External id": 973387,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936220933.126, "dur": 6.540, + "args": { + "External id": 973388,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936221031.525, "dur": 109.667, + "args": { + "External id": 973389,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936221184.883, "dur": 35.707, + "args": { + "External id": 973390,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936221232.332, "dur": 50.155, + "args": { + "External id": 973391,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936221291.764, "dur": 39.530, + "args": { + "External id": 973392,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936221358.385, "dur": 27.289, + "args": { + "External id": 973393,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936221394.670, "dur": 39.916, + "args": { + "External id": 973394,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936221455.104, "dur": 19.987, + "args": { + "External id": 973395,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12439 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 2338708, "tid": 2338708, + "ts": 6345936221657.815, "dur": 90.240, + "args": { + "External id": 973396,"Record function id": 0, "Ev Idx": 12440 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936221835.544, "dur": 56.021, + "args": { + "External id": 973397,"Record function id": 0, "Ev Idx": 12441 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 2338708, "tid": 2338708, + "ts": 6345936221901.495, "dur": 30768.458, + "args": { + "External id": 973398,"Record function id": 0, "Ev Idx": 12442 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338708, "tid": 2338708, + "ts": 6345936221914.589, "dur": 1198.231, + "args": { + "External id": 973399,"Record function id": 0, "Ev Idx": 12443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936222026.102, "dur": 11.603, + "args": { + "External id": 973400,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936222095.294, "dur": 48.172, + "args": { + "External id": 973401,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222104.464, "dur": 3.016, + "args": { + "External id": 973402,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222111.716, "dur": 0.422, + "args": { + "External id": 973403,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222115.094, "dur": 0.388, + "args": { + "External id": 973404,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222116.220, "dur": 0.583, + "args": { + "External id": 973405,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222119.739, "dur": 0.490, + "args": { + "External id": 973406,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222123.113, "dur": 0.750, + "args": { + "External id": 973407,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222124.399, "dur": 5.388, + "args": { + "External id": 973408,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222130.769, "dur": 0.670, + "args": { + "External id": 973409,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222135.693, "dur": 0.626, + "args": { + "External id": 973410,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936222156.262, "dur": 66.615, + "args": { + "External id": 973411,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936222264.057, "dur": 152.264, + "args": { + "External id": 973412,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936222281.912, "dur": 5.438, + "args": { + "External id": 973413,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936222293.544, "dur": 11.455, + "args": { + "External id": 973414,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936222299.151, "dur": 5.397, + "args": { + "External id": 973415,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222302.637, "dur": 0.541, + "args": { + "External id": 973416,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936222312.899, "dur": 38.645, + "args": { + "External id": 973417,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222317.440, "dur": 2.871, + "args": { + "External id": 973418,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222321.264, "dur": 0.588, + "args": { + "External id": 973419,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222322.530, "dur": 0.453, + "args": { + "External id": 973420,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222328.851, "dur": 2.827, + "args": { + "External id": 973421,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222332.301, "dur": 0.366, + "args": { + "External id": 973422,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222333.391, "dur": 0.628, + "args": { + "External id": 973423,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222339.477, "dur": 0.401, + "args": { + "External id": 973424,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222340.620, "dur": 0.342, + "args": { + "External id": 973425,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936222344.220, "dur": 2.136, + "args": { + "External id": 973426,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936222368.725, "dur": 38.115, + "args": { + "External id": 973427,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936222483.301, "dur": 457.283, + "args": { + "External id": 973428,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936222522.005, "dur": 412.470, + "args": { + "External id": 973429,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12473, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936222533.183, "dur": 394.861, + "args": { + "External id": 973430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936222965.005, "dur": 2.881, + "args": { + "External id": 973431,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12475, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338708, "tid": 2338708, + "ts": 6345936223140.950, "dur": 29306.552, + "args": { + "External id": 973432,"Record function id": 0, "Ev Idx": 12476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936223260.924, "dur": 7.990, + "args": { + "External id": 973433,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936223272.706, "dur": 1.097, + "args": { + "External id": 973434,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936223275.625, "dur": 3.041, + "args": { + "External id": 973435,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936223280.659, "dur": 1.009, + "args": { + "External id": 973436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936223283.322, "dur": 0.808, + "args": { + "External id": 973437,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936223288.155, "dur": 0.820, + "args": { + "External id": 973438,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936223292.466, "dur": 0.848, + "args": { + "External id": 973439,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936223294.725, "dur": 2.395, + "args": { + "External id": 973440,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936223298.795, "dur": 0.628, + "args": { + "External id": 973441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936223303.221, "dur": 0.748, + "args": { + "External id": 973442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936223324.500, "dur": 29072.716, + "args": { + "External id": 973443,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936223353.767, "dur": 29034.540, + "args": { + "External id": 973444,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936223371.985, "dur": 18.116, + "args": { + "External id": 973445,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936223393.973, "dur": 28955.325, + "args": { + "External id": 973446,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936223396.805, "dur": 28951.730, + "args": { + "External id": 973447,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936223403.551, "dur": 5.646, + "args": { + "External id": 973448,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936223411.393, "dur": 28933.309, + "args": { + "External id": 973449,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936252602.886, "dur": 36.133, + "args": { + "External id": 973450,"Sequence number": 10552253, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12494 + } + }, + { + "ph": "s", "id": 213, "pid": 2338708, "tid": 2338708, "ts": 6345936252602.886, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936252621.797, "dur": 11.688, + "args": { + "External id": 973451,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936252628.340, "dur": 4.923, + "args": { + "External id": 973452,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936252715.270, "dur": 82.414, + "args": { + "External id": 973453,"Record function id": 0, "Ev Idx": 12497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936252798.892, "dur": 1312.032, + "args": { + "External id": 973454,"Record function id": 0, "Ev Idx": 12498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936252839.622, "dur": 1253.355, + "args": { + "External id": 973455,"Sequence number": 10552254, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12499 + } + }, + { + "ph": "s", "id": 212, "pid": 2338708, "tid": 2338708, "ts": 6345936252839.622, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936252913.832, "dur": 55.279, + "args": { + "External id": 973456,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936252982.479, "dur": 170.222, + "args": { + "External id": 973457,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936253173.344, "dur": 45.582, + "args": { + "External id": 973458,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936253230.350, "dur": 33.291, + "args": { + "External id": 973459,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936253297.952, "dur": 33.547, + "args": { + "External id": 973460,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936253354.549, "dur": 21.555, + "args": { + "External id": 973461,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936253408.799, "dur": 156.507, + "args": { + "External id": 973462,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936253469.422, "dur": 13.056, + "args": { + "External id": 973463,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936253475.432, "dur": 6.180, + "args": { + "External id": 973464,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936253488.137, "dur": 6.196, + "args": { + "External id": 973465,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936253495.539, "dur": 1.174, + "args": { + "External id": 973466,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936253499.512, "dur": 5.415, + "args": { + "External id": 973467,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936253577.837, "dur": 53.059, + "args": { + "External id": 973468,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936253670.004, "dur": 33.160, + "args": { + "External id": 973469,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936253713.426, "dur": 47.455, + "args": { + "External id": 973470,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936253770.347, "dur": 40.034, + "args": { + "External id": 973471,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936253835.445, "dur": 29.936, + "args": { + "External id": 973472,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936253874.357, "dur": 40.355, + "args": { + "External id": 973473,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936253935.083, "dur": 21.087, + "args": { + "External id": 973474,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12518 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 2338708, "tid": 2338708, + "ts": 6345936254191.286, "dur": 91.593, + "args": { + "External id": 973475,"Record function id": 0, "Ev Idx": 12519 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936254369.428, "dur": 52.703, + "args": { + "External id": 973476,"Record function id": 0, "Ev Idx": 12520 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 2338708, "tid": 2338708, + "ts": 6345936254432.614, "dur": 33690.665, + "args": { + "External id": 973477,"Record function id": 0, "Ev Idx": 12521 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338708, "tid": 2338708, + "ts": 6345936254446.448, "dur": 1078.143, + "args": { + "External id": 973478,"Record function id": 0, "Ev Idx": 12522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936254536.325, "dur": 11.078, + "args": { + "External id": 973479,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936254562.537, "dur": 45.023, + "args": { + "External id": 973480,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254568.656, "dur": 2.933, + "args": { + "External id": 973481,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254576.171, "dur": 0.405, + "args": { + "External id": 973482,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254579.787, "dur": 0.395, + "args": { + "External id": 973483,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254580.989, "dur": 0.833, + "args": { + "External id": 973484,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254587.548, "dur": 0.735, + "args": { + "External id": 973485,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254588.896, "dur": 0.693, + "args": { + "External id": 973486,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254590.538, "dur": 5.359, + "args": { + "External id": 973487,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254598.731, "dur": 0.385, + "args": { + "External id": 973488,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254599.843, "dur": 0.474, + "args": { + "External id": 973489,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936254619.336, "dur": 67.105, + "args": { + "External id": 973490,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936254723.950, "dur": 145.078, + "args": { + "External id": 973491,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936254738.024, "dur": 4.732, + "args": { + "External id": 973492,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936254748.710, "dur": 11.407, + "args": { + "External id": 973493,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936254753.766, "dur": 5.885, + "args": { + "External id": 973494,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254757.862, "dur": 0.501, + "args": { + "External id": 973495,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936254767.890, "dur": 39.056, + "args": { + "External id": 973496,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254772.325, "dur": 3.185, + "args": { + "External id": 973497,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254776.397, "dur": 0.551, + "args": { + "External id": 973498,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254780.114, "dur": 0.637, + "args": { + "External id": 973499,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254783.444, "dur": 2.551, + "args": { + "External id": 973500,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254786.930, "dur": 0.544, + "args": { + "External id": 973501,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254790.337, "dur": 0.520, + "args": { + "External id": 973502,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254794.493, "dur": 0.603, + "args": { + "External id": 973503,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254795.959, "dur": 0.884, + "args": { + "External id": 973504,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936254799.641, "dur": 2.798, + "args": { + "External id": 973505,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936254820.695, "dur": 39.076, + "args": { + "External id": 973506,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936254929.340, "dur": 474.026, + "args": { + "External id": 973507,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936254965.785, "dur": 430.893, + "args": { + "External id": 973508,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12552, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936254976.895, "dur": 411.059, + "args": { + "External id": 973509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936255435.128, "dur": 2.986, + "args": { + "External id": 973510,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12554, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338708, "tid": 2338708, + "ts": 6345936255549.676, "dur": 32299.883, + "args": { + "External id": 973511,"Record function id": 0, "Ev Idx": 12555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936255661.979, "dur": 7.600, + "args": { + "External id": 973512,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936255673.928, "dur": 1.007, + "args": { + "External id": 973513,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936255676.764, "dur": 3.305, + "args": { + "External id": 973514,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936255682.248, "dur": 0.966, + "args": { + "External id": 973515,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936255684.773, "dur": 0.846, + "args": { + "External id": 973516,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936255687.351, "dur": 1.163, + "args": { + "External id": 973517,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936255692.793, "dur": 1.215, + "args": { + "External id": 973518,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936255695.768, "dur": 2.158, + "args": { + "External id": 973519,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936255699.272, "dur": 0.856, + "args": { + "External id": 973520,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936255701.495, "dur": 1.010, + "args": { + "External id": 973521,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936255726.396, "dur": 32073.607, + "args": { + "External id": 973522,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936255743.970, "dur": 32047.048, + "args": { + "External id": 973523,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936255764.650, "dur": 17.564, + "args": { + "External id": 973524,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936255785.943, "dur": 31962.279, + "args": { + "External id": 973525,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936255789.263, "dur": 31958.188, + "args": { + "External id": 973526,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936255797.598, "dur": 7.732, + "args": { + "External id": 973527,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936255807.355, "dur": 31936.189, + "args": { + "External id": 973528,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936288005.221, "dur": 84.481, + "args": { + "External id": 973529,"Sequence number": 10552255, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12573 + } + }, + { + "ph": "s", "id": 211, "pid": 2338708, "tid": 2338708, "ts": 6345936288005.221, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936288041.521, "dur": 10.117, + "args": { + "External id": 973530,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936288045.521, "dur": 5.643, + "args": { + "External id": 973531,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936288173.171, "dur": 85.707, + "args": { + "External id": 973532,"Record function id": 0, "Ev Idx": 12576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936288260.491, "dur": 1292.106, + "args": { + "External id": 973533,"Record function id": 0, "Ev Idx": 12577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936288308.868, "dur": 1227.766, + "args": { + "External id": 973534,"Sequence number": 10552256, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12578 + } + }, + { + "ph": "s", "id": 210, "pid": 2338708, "tid": 2338708, "ts": 6345936288308.868, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936288389.963, "dur": 59.634, + "args": { + "External id": 973535,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936288464.864, "dur": 118.549, + "args": { + "External id": 973536,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936288597.766, "dur": 41.129, + "args": { + "External id": 973537,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936288648.994, "dur": 33.056, + "args": { + "External id": 973538,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936288714.346, "dur": 31.114, + "args": { + "External id": 973539,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936288768.303, "dur": 20.886, + "args": { + "External id": 973540,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936288815.046, "dur": 152.598, + "args": { + "External id": 973541,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936288875.309, "dur": 14.508, + "args": { + "External id": 973542,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936288883.004, "dur": 5.805, + "args": { + "External id": 973543,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936288892.468, "dur": 4.835, + "args": { + "External id": 973544,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936288898.631, "dur": 1.210, + "args": { + "External id": 973545,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936288902.574, "dur": 6.267, + "args": { + "External id": 973546,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936288980.630, "dur": 116.518, + "args": { + "External id": 973547,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936289144.597, "dur": 35.672, + "args": { + "External id": 973548,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936289191.805, "dur": 51.950, + "args": { + "External id": 973549,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936289253.769, "dur": 39.467, + "args": { + "External id": 973550,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936289318.348, "dur": 31.066, + "args": { + "External id": 973551,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936289359.644, "dur": 42.308, + "args": { + "External id": 973552,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936289426.891, "dur": 22.712, + "args": { + "External id": 973553,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 2338708, "tid": 2338708, + "ts": 6345936289632.111, "dur": 90.351, + "args": { + "External id": 973554,"Record function id": 0, "Ev Idx": 12598 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936289809.464, "dur": 55.706, + "args": { + "External id": 973555,"Record function id": 0, "Ev Idx": 12599 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 2338708, "tid": 2338708, + "ts": 6345936289875.971, "dur": 32432.026, + "args": { + "External id": 973556,"Record function id": 0, "Ev Idx": 12600 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338708, "tid": 2338708, + "ts": 6345936289888.468, "dur": 1081.365, + "args": { + "External id": 973557,"Record function id": 0, "Ev Idx": 12601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936289977.254, "dur": 10.188, + "args": { + "External id": 973558,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936290002.405, "dur": 112.230, + "args": { + "External id": 973559,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290029.850, "dur": 2.835, + "args": { + "External id": 973560,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290040.185, "dur": 0.574, + "args": { + "External id": 973561,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290041.846, "dur": 0.352, + "args": { + "External id": 973562,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290043.141, "dur": 0.281, + "args": { + "External id": 973563,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290047.346, "dur": 0.425, + "args": { + "External id": 973564,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290048.771, "dur": 0.510, + "args": { + "External id": 973565,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290051.566, "dur": 50.242, + "args": { + "External id": 973566,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290105.184, "dur": 0.428, + "args": { + "External id": 973567,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290106.451, "dur": 0.520, + "args": { + "External id": 973568,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936290129.839, "dur": 70.080, + "args": { + "External id": 973569,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936290242.885, "dur": 148.207, + "args": { + "External id": 973570,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936290258.524, "dur": 5.882, + "args": { + "External id": 973571,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936290270.612, "dur": 11.277, + "args": { + "External id": 973572,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936290275.918, "dur": 5.484, + "args": { + "External id": 973573,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290279.307, "dur": 0.678, + "args": { + "External id": 973574,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936290292.242, "dur": 34.107, + "args": { + "External id": 973575,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290294.274, "dur": 3.155, + "args": { + "External id": 973576,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290298.601, "dur": 0.536, + "args": { + "External id": 973577,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290301.680, "dur": 0.605, + "args": { + "External id": 973578,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290305.158, "dur": 2.558, + "args": { + "External id": 973579,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290309.998, "dur": 0.334, + "args": { + "External id": 973580,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290311.255, "dur": 0.333, + "args": { + "External id": 973581,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290315.066, "dur": 0.435, + "args": { + "External id": 973582,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290318.112, "dur": 0.280, + "args": { + "External id": 973583,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936290319.092, "dur": 2.669, + "args": { + "External id": 973584,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936290343.739, "dur": 37.859, + "args": { + "External id": 973585,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936290457.758, "dur": 407.421, + "args": { + "External id": 973586,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936290497.102, "dur": 362.227, + "args": { + "External id": 973587,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12631, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936290510.418, "dur": 342.814, + "args": { + "External id": 973588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936290890.810, "dur": 2.858, + "args": { + "External id": 973589,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12633, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338708, "tid": 2338708, + "ts": 6345936290993.707, "dur": 31090.636, + "args": { + "External id": 973590,"Record function id": 0, "Ev Idx": 12634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936291182.772, "dur": 8.388, + "args": { + "External id": 973591,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936291195.808, "dur": 1.228, + "args": { + "External id": 973592,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936291199.334, "dur": 3.406, + "args": { + "External id": 973593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936291206.725, "dur": 1.163, + "args": { + "External id": 973594,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936291209.450, "dur": 1.046, + "args": { + "External id": 973595,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936291212.045, "dur": 1.003, + "args": { + "External id": 973596,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936291215.036, "dur": 1.013, + "args": { + "External id": 973597,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936291219.845, "dur": 2.356, + "args": { + "External id": 973598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936291223.808, "dur": 0.862, + "args": { + "External id": 973599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936291226.035, "dur": 0.992, + "args": { + "External id": 973600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936291251.090, "dur": 30744.630, + "args": { + "External id": 973601,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936291270.108, "dur": 30716.831, + "args": { + "External id": 973602,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936291290.157, "dur": 19.271, + "args": { + "External id": 973603,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936291313.484, "dur": 30633.194, + "args": { + "External id": 973604,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936291316.604, "dur": 30629.268, + "args": { + "External id": 973605,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936291322.727, "dur": 8.535, + "args": { + "External id": 973606,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936291333.466, "dur": 30608.643, + "args": { + "External id": 973607,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936322240.919, "dur": 36.761, + "args": { + "External id": 973608,"Sequence number": 10552257, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12652 + } + }, + { + "ph": "s", "id": 209, "pid": 2338708, "tid": 2338708, "ts": 6345936322240.919, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936322260.822, "dur": 11.342, + "args": { + "External id": 973609,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936322266.754, "dur": 5.090, + "args": { + "External id": 973610,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936322355.635, "dur": 82.733, + "args": { + "External id": 973611,"Record function id": 0, "Ev Idx": 12655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936322439.910, "dur": 1283.925, + "args": { + "External id": 973612,"Record function id": 0, "Ev Idx": 12656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936322485.511, "dur": 1221.797, + "args": { + "External id": 973613,"Sequence number": 10552258, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12657 + } + }, + { + "ph": "s", "id": 208, "pid": 2338708, "tid": 2338708, "ts": 6345936322485.511, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936322562.277, "dur": 56.293, + "args": { + "External id": 973614,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936322634.730, "dur": 117.841, + "args": { + "External id": 973615,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936322767.248, "dur": 41.026, + "args": { + "External id": 973616,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936322818.599, "dur": 32.113, + "args": { + "External id": 973617,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936322877.385, "dur": 28.450, + "args": { + "External id": 973618,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936322933.246, "dur": 20.000, + "args": { + "External id": 973619,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936322977.450, "dur": 240.382, + "args": { + "External id": 973620,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936323102.703, "dur": 14.936, + "args": { + "External id": 973621,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936323108.915, "dur": 7.491, + "args": { + "External id": 973622,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936323120.808, "dur": 4.637, + "args": { + "External id": 973623,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936323136.834, "dur": 1.205, + "args": { + "External id": 973624,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936323143.188, "dur": 7.634, + "args": { + "External id": 973625,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936323232.581, "dur": 64.069, + "args": { + "External id": 973626,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936323335.806, "dur": 33.954, + "args": { + "External id": 973627,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936323381.782, "dur": 47.875, + "args": { + "External id": 973628,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936323436.808, "dur": 39.282, + "args": { + "External id": 973629,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936323500.925, "dur": 32.100, + "args": { + "External id": 973630,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936323539.665, "dur": 40.540, + "args": { + "External id": 973631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936323601.814, "dur": 20.903, + "args": { + "External id": 973632,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12676 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 2338708, "tid": 2338708, + "ts": 6345936323798.754, "dur": 91.041, + "args": { + "External id": 973633,"Record function id": 0, "Ev Idx": 12677 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936323978.324, "dur": 121.123, + "args": { + "External id": 973634,"Record function id": 0, "Ev Idx": 12678 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 2338708, "tid": 2338708, + "ts": 6345936324113.415, "dur": 30373.289, + "args": { + "External id": 973635,"Record function id": 0, "Ev Idx": 12679 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338708, "tid": 2338708, + "ts": 6345936324125.995, "dur": 1082.056, + "args": { + "External id": 973636,"Record function id": 0, "Ev Idx": 12680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936324216.571, "dur": 11.445, + "args": { + "External id": 973637,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936324244.124, "dur": 47.149, + "args": { + "External id": 973638,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324252.656, "dur": 2.905, + "args": { + "External id": 973639,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324259.862, "dur": 0.605, + "args": { + "External id": 973640,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324263.640, "dur": 0.403, + "args": { + "External id": 973641,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324264.836, "dur": 0.604, + "args": { + "External id": 973642,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324268.616, "dur": 0.523, + "args": { + "External id": 973643,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324271.361, "dur": 0.452, + "args": { + "External id": 973644,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324272.792, "dur": 5.271, + "args": { + "External id": 973645,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324279.508, "dur": 0.445, + "args": { + "External id": 973646,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324283.676, "dur": 0.537, + "args": { + "External id": 973647,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936324303.831, "dur": 66.585, + "args": { + "External id": 973648,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936324408.477, "dur": 142.570, + "args": { + "External id": 973649,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936324424.456, "dur": 4.455, + "args": { + "External id": 973650,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936324435.066, "dur": 14.348, + "args": { + "External id": 973651,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936324443.403, "dur": 5.542, + "args": { + "External id": 973652,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324447.045, "dur": 0.535, + "args": { + "External id": 973653,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936324457.541, "dur": 36.286, + "args": { + "External id": 973654,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324461.209, "dur": 2.947, + "args": { + "External id": 973655,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324465.097, "dur": 0.533, + "args": { + "External id": 973656,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324466.500, "dur": 0.406, + "args": { + "External id": 973657,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324472.228, "dur": 2.828, + "args": { + "External id": 973658,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324475.961, "dur": 0.367, + "args": { + "External id": 973659,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324477.098, "dur": 0.275, + "args": { + "External id": 973660,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324482.936, "dur": 0.333, + "args": { + "External id": 973661,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324484.005, "dur": 0.371, + "args": { + "External id": 973662,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936324487.115, "dur": 2.269, + "args": { + "External id": 973663,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936324505.085, "dur": 36.555, + "args": { + "External id": 973664,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936324611.675, "dur": 435.060, + "args": { + "External id": 973665,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936324649.986, "dur": 389.726, + "args": { + "External id": 973666,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12710, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936324661.706, "dur": 369.544, + "args": { + "External id": 973667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936325113.162, "dur": 4.247, + "args": { + "External id": 973668,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12712, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338708, "tid": 2338708, + "ts": 6345936325234.317, "dur": 29032.147, + "args": { + "External id": 973669,"Record function id": 0, "Ev Idx": 12713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936325352.718, "dur": 7.898, + "args": { + "External id": 973670,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936325364.546, "dur": 1.174, + "args": { + "External id": 973671,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936325367.541, "dur": 3.538, + "args": { + "External id": 973672,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936325373.065, "dur": 0.854, + "args": { + "External id": 973673,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936325375.430, "dur": 0.876, + "args": { + "External id": 973674,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936325377.818, "dur": 0.925, + "args": { + "External id": 973675,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936325383.150, "dur": 0.917, + "args": { + "External id": 973676,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936325385.496, "dur": 2.582, + "args": { + "External id": 973677,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936325389.600, "dur": 0.983, + "args": { + "External id": 973678,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936325392.096, "dur": 0.712, + "args": { + "External id": 973679,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936325429.938, "dur": 28785.950, + "args": { + "External id": 973680,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936325449.948, "dur": 28757.069, + "args": { + "External id": 973681,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936325467.092, "dur": 17.334, + "args": { + "External id": 973682,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936325488.482, "dur": 28676.000, + "args": { + "External id": 973683,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936325491.256, "dur": 28672.418, + "args": { + "External id": 973684,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936325497.771, "dur": 5.519, + "args": { + "External id": 973685,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936325505.181, "dur": 28654.469, + "args": { + "External id": 973686,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936354419.671, "dur": 36.126, + "args": { + "External id": 973687,"Sequence number": 10552259, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12731 + } + }, + { + "ph": "s", "id": 207, "pid": 2338708, "tid": 2338708, "ts": 6345936354419.671, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936354438.750, "dur": 11.562, + "args": { + "External id": 973688,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936354444.965, "dur": 5.068, + "args": { + "External id": 973689,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936354533.889, "dur": 82.482, + "args": { + "External id": 973690,"Record function id": 0, "Ev Idx": 12734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936354617.842, "dur": 1264.980, + "args": { + "External id": 973691,"Record function id": 0, "Ev Idx": 12735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936354663.796, "dur": 1202.768, + "args": { + "External id": 973692,"Sequence number": 10552260, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12736 + } + }, + { + "ph": "s", "id": 206, "pid": 2338708, "tid": 2338708, "ts": 6345936354663.796, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936354738.506, "dur": 53.930, + "args": { + "External id": 973693,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936354807.292, "dur": 114.885, + "args": { + "External id": 973694,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936354937.639, "dur": 40.265, + "args": { + "External id": 973695,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936354988.083, "dur": 53.045, + "args": { + "External id": 973696,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936355118.652, "dur": 35.531, + "args": { + "External id": 973697,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936355181.197, "dur": 20.605, + "args": { + "External id": 973698,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936355227.971, "dur": 152.205, + "args": { + "External id": 973699,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936355289.481, "dur": 13.232, + "args": { + "External id": 973700,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936355295.444, "dur": 6.285, + "args": { + "External id": 973701,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936355306.528, "dur": 4.456, + "args": { + "External id": 973702,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936355312.463, "dur": 1.291, + "args": { + "External id": 973703,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936355316.453, "dur": 5.189, + "args": { + "External id": 973704,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936355393.345, "dur": 63.780, + "args": { + "External id": 973705,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936355498.235, "dur": 33.772, + "args": { + "External id": 973706,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936355543.281, "dur": 48.357, + "args": { + "External id": 973707,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936355600.825, "dur": 39.740, + "args": { + "External id": 973708,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936355667.890, "dur": 27.269, + "args": { + "External id": 973709,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936355704.232, "dur": 39.867, + "args": { + "External id": 973710,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936355764.718, "dur": 21.064, + "args": { + "External id": 973711,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12755 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 2338708, "tid": 2338708, + "ts": 6345936355960.283, "dur": 153.316, + "args": { + "External id": 973712,"Record function id": 0, "Ev Idx": 12756 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936356208.903, "dur": 59.353, + "args": { + "External id": 973713,"Record function id": 0, "Ev Idx": 12757 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 2338708, "tid": 2338708, + "ts": 6345936356279.307, "dur": 33530.590, + "args": { + "External id": 973714,"Record function id": 0, "Ev Idx": 12758 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338708, "tid": 2338708, + "ts": 6345936356288.845, "dur": 1116.431, + "args": { + "External id": 973715,"Record function id": 0, "Ev Idx": 12759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936356380.783, "dur": 11.758, + "args": { + "External id": 973716,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936356411.086, "dur": 44.475, + "args": { + "External id": 973717,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356417.691, "dur": 2.801, + "args": { + "External id": 973718,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356425.737, "dur": 0.635, + "args": { + "External id": 973719,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356427.731, "dur": 0.762, + "args": { + "External id": 973720,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356430.256, "dur": 0.640, + "args": { + "External id": 973721,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356434.347, "dur": 0.527, + "args": { + "External id": 973722,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356436.592, "dur": 0.608, + "args": { + "External id": 973723,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356438.781, "dur": 5.154, + "args": { + "External id": 973724,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356445.870, "dur": 0.535, + "args": { + "External id": 973725,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356448.061, "dur": 0.545, + "args": { + "External id": 973726,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936356467.730, "dur": 63.764, + "args": { + "External id": 973727,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936356570.750, "dur": 177.128, + "args": { + "External id": 973728,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936356584.532, "dur": 4.599, + "args": { + "External id": 973729,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936356595.395, "dur": 15.486, + "args": { + "External id": 973730,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936356603.802, "dur": 6.600, + "args": { + "External id": 973731,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356608.142, "dur": 0.826, + "args": { + "External id": 973732,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936356619.258, "dur": 40.943, + "args": { + "External id": 973733,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356621.771, "dur": 3.624, + "args": { + "External id": 973734,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356631.798, "dur": 0.579, + "args": { + "External id": 973735,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356634.235, "dur": 0.482, + "args": { + "External id": 973736,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356638.394, "dur": 2.725, + "args": { + "External id": 973737,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356642.606, "dur": 0.427, + "args": { + "External id": 973738,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356644.894, "dur": 2.462, + "args": { + "External id": 973739,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356648.977, "dur": 0.535, + "args": { + "External id": 973740,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356650.902, "dur": 0.441, + "args": { + "External id": 973741,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936356655.231, "dur": 0.437, + "args": { + "External id": 973742,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936356682.561, "dur": 55.920, + "args": { + "External id": 973743,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936356809.151, "dur": 476.472, + "args": { + "External id": 973744,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936356845.555, "dur": 433.882, + "args": { + "External id": 973745,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12789, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936356859.409, "dur": 412.729, + "args": { + "External id": 973746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936357315.092, "dur": 2.978, + "args": { + "External id": 973747,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12791, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338708, "tid": 2338708, + "ts": 6345936357429.330, "dur": 32160.240, + "args": { + "External id": 973748,"Record function id": 0, "Ev Idx": 12792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936357541.051, "dur": 7.367, + "args": { + "External id": 973749,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936357552.774, "dur": 1.267, + "args": { + "External id": 973750,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936357556.138, "dur": 3.502, + "args": { + "External id": 973751,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936357561.804, "dur": 1.136, + "args": { + "External id": 973752,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936357564.859, "dur": 1.039, + "args": { + "External id": 973753,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936357567.638, "dur": 0.895, + "args": { + "External id": 973754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936357572.558, "dur": 0.933, + "args": { + "External id": 973755,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936357574.890, "dur": 2.288, + "args": { + "External id": 973756,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936357579.061, "dur": 0.933, + "args": { + "External id": 973757,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936357581.862, "dur": 0.576, + "args": { + "External id": 973758,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936357606.966, "dur": 31930.471, + "args": { + "External id": 973759,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936357625.485, "dur": 31902.891, + "args": { + "External id": 973760,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936357644.944, "dur": 18.425, + "args": { + "External id": 973761,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936357667.144, "dur": 31820.140, + "args": { + "External id": 973762,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936357670.189, "dur": 31816.353, + "args": { + "External id": 973763,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936357677.180, "dur": 7.018, + "args": { + "External id": 973764,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936357686.693, "dur": 31796.062, + "args": { + "External id": 973765,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936389746.984, "dur": 34.547, + "args": { + "External id": 973766,"Sequence number": 10552261, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12810 + } + }, + { + "ph": "s", "id": 205, "pid": 2338708, "tid": 2338708, "ts": 6345936389746.984, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936389766.022, "dur": 10.108, + "args": { + "External id": 973767,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936389770.696, "dur": 5.230, + "args": { + "External id": 973768,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936389856.154, "dur": 81.244, + "args": { + "External id": 973769,"Record function id": 0, "Ev Idx": 12813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936389939.287, "dur": 1335.504, + "args": { + "External id": 973770,"Record function id": 0, "Ev Idx": 12814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936389985.029, "dur": 1273.866, + "args": { + "External id": 973771,"Sequence number": 10552262, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12815 + } + }, + { + "ph": "s", "id": 204, "pid": 2338708, "tid": 2338708, "ts": 6345936389985.029, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936390113.668, "dur": 56.294, + "args": { + "External id": 973772,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936390187.292, "dur": 119.185, + "args": { + "External id": 973773,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936390320.844, "dur": 40.466, + "args": { + "External id": 973774,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936390371.191, "dur": 33.880, + "args": { + "External id": 973775,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936390435.972, "dur": 30.836, + "args": { + "External id": 973776,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936390490.860, "dur": 22.922, + "args": { + "External id": 973777,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936390540.613, "dur": 151.709, + "args": { + "External id": 973778,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936390598.182, "dur": 13.195, + "args": { + "External id": 973779,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936390604.522, "dur": 5.946, + "args": { + "External id": 973780,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936390614.866, "dur": 5.816, + "args": { + "External id": 973781,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936390622.490, "dur": 1.489, + "args": { + "External id": 973782,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936390626.845, "dur": 5.948, + "args": { + "External id": 973783,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936390705.338, "dur": 53.085, + "args": { + "External id": 973784,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936390794.538, "dur": 31.299, + "args": { + "External id": 973785,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936390843.388, "dur": 49.935, + "args": { + "External id": 973786,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936390903.462, "dur": 41.857, + "args": { + "External id": 973787,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936390975.852, "dur": 50.420, + "args": { + "External id": 973788,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936391038.500, "dur": 87.377, + "args": { + "External id": 973789,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936391153.944, "dur": 23.450, + "args": { + "External id": 973790,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12834 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 2338708, "tid": 2338708, + "ts": 6345936391353.875, "dur": 92.033, + "args": { + "External id": 973791,"Record function id": 0, "Ev Idx": 12835 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936391535.878, "dur": 55.781, + "args": { + "External id": 973792,"Record function id": 0, "Ev Idx": 12836 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 2338708, "tid": 2338708, + "ts": 6345936391602.286, "dur": 31332.128, + "args": { + "External id": 973793,"Record function id": 0, "Ev Idx": 12837 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338708, "tid": 2338708, + "ts": 6345936391612.249, "dur": 1104.789, + "args": { + "External id": 973794,"Record function id": 0, "Ev Idx": 12838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936391705.141, "dur": 10.079, + "args": { + "External id": 973795,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936391730.721, "dur": 45.422, + "args": { + "External id": 973796,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391737.837, "dur": 2.799, + "args": { + "External id": 973797,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391746.014, "dur": 0.504, + "args": { + "External id": 973798,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391748.451, "dur": 0.481, + "args": { + "External id": 973799,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391750.963, "dur": 0.588, + "args": { + "External id": 973800,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391755.467, "dur": 0.611, + "args": { + "External id": 973801,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391757.517, "dur": 0.459, + "args": { + "External id": 973802,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391759.698, "dur": 5.282, + "args": { + "External id": 973803,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391766.816, "dur": 0.288, + "args": { + "External id": 973804,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391769.029, "dur": 0.321, + "args": { + "External id": 973805,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936391789.926, "dur": 61.055, + "args": { + "External id": 973806,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936391889.656, "dur": 207.444, + "args": { + "External id": 973807,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936391904.041, "dur": 4.382, + "args": { + "External id": 973808,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936391914.607, "dur": 14.705, + "args": { + "External id": 973809,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936391922.505, "dur": 6.371, + "args": { + "External id": 973810,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391926.976, "dur": 0.566, + "args": { + "External id": 973811,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936391936.844, "dur": 36.520, + "args": { + "External id": 973812,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391939.763, "dur": 3.097, + "args": { + "External id": 973813,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391944.707, "dur": 0.585, + "args": { + "External id": 973814,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391947.183, "dur": 0.557, + "args": { + "External id": 973815,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391951.770, "dur": 2.773, + "args": { + "External id": 973816,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391956.285, "dur": 0.352, + "args": { + "External id": 973817,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391958.045, "dur": 0.509, + "args": { + "External id": 973818,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391962.386, "dur": 0.337, + "args": { + "External id": 973819,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391964.643, "dur": 0.414, + "args": { + "External id": 973820,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936391966.730, "dur": 2.109, + "args": { + "External id": 973821,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936391986.536, "dur": 60.759, + "args": { + "External id": 973822,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936392167.266, "dur": 438.512, + "args": { + "External id": 973823,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936392208.827, "dur": 391.063, + "args": { + "External id": 973824,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12868, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936392252.491, "dur": 340.801, + "args": { + "External id": 973825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936392631.573, "dur": 2.956, + "args": { + "External id": 973826,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12870, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338708, "tid": 2338708, + "ts": 6345936392740.792, "dur": 29976.716, + "args": { + "External id": 973827,"Record function id": 0, "Ev Idx": 12871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936392854.171, "dur": 7.837, + "args": { + "External id": 973828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936392866.971, "dur": 0.931, + "args": { + "External id": 973829,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936392870.099, "dur": 4.110, + "args": { + "External id": 973830,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936392876.560, "dur": 0.966, + "args": { + "External id": 973831,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936392879.595, "dur": 1.072, + "args": { + "External id": 973832,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936392882.719, "dur": 0.851, + "args": { + "External id": 973833,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936392887.806, "dur": 1.039, + "args": { + "External id": 973834,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936392890.392, "dur": 2.331, + "args": { + "External id": 973835,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936392894.869, "dur": 0.733, + "args": { + "External id": 973836,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936392897.668, "dur": 0.883, + "args": { + "External id": 973837,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936392921.692, "dur": 29746.102, + "args": { + "External id": 973838,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936392940.438, "dur": 29718.181, + "args": { + "External id": 973839,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936392958.582, "dur": 19.948, + "args": { + "External id": 973840,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936392982.474, "dur": 29634.844, + "args": { + "External id": 973841,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936392985.598, "dur": 29630.872, + "args": { + "External id": 973842,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936392993.114, "dur": 7.039, + "args": { + "External id": 973843,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936393002.485, "dur": 29609.994, + "args": { + "External id": 973844,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936422871.927, "dur": 33.608, + "args": { + "External id": 973845,"Sequence number": 10552263, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12889 + } + }, + { + "ph": "s", "id": 203, "pid": 2338708, "tid": 2338708, "ts": 6345936422871.927, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936422890.605, "dur": 9.679, + "args": { + "External id": 973846,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936422894.631, "dur": 5.420, + "args": { + "External id": 973847,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936422980.199, "dur": 124.176, + "args": { + "External id": 973848,"Record function id": 0, "Ev Idx": 12892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936423107.756, "dur": 1270.847, + "args": { + "External id": 973849,"Record function id": 0, "Ev Idx": 12893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936423157.316, "dur": 1204.443, + "args": { + "External id": 973850,"Sequence number": 10552264, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12894 + } + }, + { + "ph": "s", "id": 202, "pid": 2338708, "tid": 2338708, "ts": 6345936423157.316, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936423238.657, "dur": 59.843, + "args": { + "External id": 973851,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936423313.338, "dur": 117.839, + "args": { + "External id": 973852,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936423446.175, "dur": 41.038, + "args": { + "External id": 973853,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936423496.915, "dur": 32.380, + "args": { + "External id": 973854,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936423560.039, "dur": 30.394, + "args": { + "External id": 973855,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936423613.047, "dur": 19.462, + "args": { + "External id": 973856,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936423659.187, "dur": 152.715, + "args": { + "External id": 973857,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936423719.121, "dur": 13.306, + "args": { + "External id": 973858,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936423725.404, "dur": 6.162, + "args": { + "External id": 973859,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936423735.967, "dur": 5.973, + "args": { + "External id": 973860,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936423743.726, "dur": 1.294, + "args": { + "External id": 973861,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936423748.581, "dur": 5.151, + "args": { + "External id": 973862,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936423824.262, "dur": 50.438, + "args": { + "External id": 973863,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936423911.628, "dur": 30.533, + "args": { + "External id": 973864,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936423952.417, "dur": 46.024, + "args": { + "External id": 973865,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936424026.921, "dur": 85.354, + "args": { + "External id": 973866,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936424147.135, "dur": 28.693, + "args": { + "External id": 973867,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936424186.335, "dur": 45.902, + "args": { + "External id": 973868,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936424251.752, "dur": 21.963, + "args": { + "External id": 973869,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12913 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 2338708, "tid": 2338708, + "ts": 6345936424457.760, "dur": 93.958, + "args": { + "External id": 973870,"Record function id": 0, "Ev Idx": 12914 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936424642.615, "dur": 57.043, + "args": { + "External id": 973871,"Record function id": 0, "Ev Idx": 12915 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 2338708, "tid": 2338708, + "ts": 6345936424710.519, "dur": 30387.593, + "args": { + "External id": 973872,"Record function id": 0, "Ev Idx": 12916 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338708, "tid": 2338708, + "ts": 6345936424721.570, "dur": 1082.528, + "args": { + "External id": 973873,"Record function id": 0, "Ev Idx": 12917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936424812.641, "dur": 9.386, + "args": { + "External id": 973874,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936424837.738, "dur": 44.708, + "args": { + "External id": 973875,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936424844.150, "dur": 2.858, + "args": { + "External id": 973876,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936424852.073, "dur": 0.469, + "args": { + "External id": 973877,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936424854.042, "dur": 0.629, + "args": { + "External id": 973878,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936424856.018, "dur": 0.584, + "args": { + "External id": 973879,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936424860.244, "dur": 0.530, + "args": { + "External id": 973880,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936424862.532, "dur": 0.718, + "args": { + "External id": 973881,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936424865.259, "dur": 5.159, + "args": { + "External id": 973882,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936424872.008, "dur": 0.288, + "args": { + "External id": 973883,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936424874.493, "dur": 0.597, + "args": { + "External id": 973884,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936424899.901, "dur": 61.771, + "args": { + "External id": 973885,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936425000.164, "dur": 221.103, + "args": { + "External id": 973886,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 12930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936425036.734, "dur": 8.517, + "args": { + "External id": 973887,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936425089.276, "dur": 18.513, + "args": { + "External id": 973888,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936425098.040, "dur": 9.166, + "args": { + "External id": 973889,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 12933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936425103.405, "dur": 1.228, + "args": { + "External id": 973890,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 12934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936425118.378, "dur": 36.516, + "args": { + "External id": 973891,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936425121.919, "dur": 0.502, + "args": { + "External id": 973892,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936425124.365, "dur": 2.477, + "args": { + "External id": 973893,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936425128.751, "dur": 0.705, + "args": { + "External id": 973894,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936425131.139, "dur": 3.411, + "args": { + "External id": 973895,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936425138.321, "dur": 0.690, + "args": { + "External id": 973896,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936425140.807, "dur": 0.479, + "args": { + "External id": 973897,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936425143.116, "dur": 0.689, + "args": { + "External id": 973898,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936425147.896, "dur": 0.375, + "args": { + "External id": 973899,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936425149.620, "dur": 0.380, + "args": { + "External id": 973900,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936425171.319, "dur": 40.101, + "args": { + "External id": 973901,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 12945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936425288.617, "dur": 410.759, + "args": { + "External id": 973902,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 12946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936425327.193, "dur": 366.215, + "args": { + "External id": 973903,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 12947, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936425339.239, "dur": 347.997, + "args": { + "External id": 973904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 12948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936425725.443, "dur": 2.648, + "args": { + "External id": 973905,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 12949, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338708, "tid": 2338708, + "ts": 6345936425829.012, "dur": 28997.259, + "args": { + "External id": 973906,"Record function id": 0, "Ev Idx": 12950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936425939.072, "dur": 6.798, + "args": { + "External id": 973907,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 12951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936425951.027, "dur": 1.362, + "args": { + "External id": 973908,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936425954.540, "dur": 2.941, + "args": { + "External id": 973909,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936425959.841, "dur": 1.050, + "args": { + "External id": 973910,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936425962.580, "dur": 0.982, + "args": { + "External id": 973911,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 12955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936425965.267, "dur": 1.033, + "args": { + "External id": 973912,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 12956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936425970.257, "dur": 0.955, + "args": { + "External id": 973913,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 12957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936425973.158, "dur": 2.543, + "args": { + "External id": 973914,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936425977.664, "dur": 0.528, + "args": { + "External id": 973915,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936425980.078, "dur": 0.852, + "args": { + "External id": 973916,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 12960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936426004.512, "dur": 28770.082, + "args": { + "External id": 973917,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936426044.706, "dur": 28720.740, + "args": { + "External id": 973918,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 12962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936426106.226, "dur": 21.283, + "args": { + "External id": 973919,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936426132.208, "dur": 28593.247, + "args": { + "External id": 973920,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 12964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936426135.540, "dur": 28589.245, + "args": { + "External id": 973921,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 12965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936426142.776, "dur": 7.627, + "args": { + "External id": 973922,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936426152.546, "dur": 28568.554, + "args": { + "External id": 973923,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 12967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936454985.076, "dur": 49.384, + "args": { + "External id": 973924,"Sequence number": 10552265, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 12968 + } + }, + { + "ph": "s", "id": 201, "pid": 2338708, "tid": 2338708, "ts": 6345936454985.076, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936455005.284, "dur": 23.036, + "args": { + "External id": 973925,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 12969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936455022.126, "dur": 5.647, + "args": { + "External id": 973926,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 12970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936455148.748, "dur": 80.399, + "args": { + "External id": 973927,"Record function id": 0, "Ev Idx": 12971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936455231.477, "dur": 1260.242, + "args": { + "External id": 973928,"Record function id": 0, "Ev Idx": 12972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936455278.569, "dur": 1197.109, + "args": { + "External id": 973929,"Sequence number": 10552266, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 12973 + } + }, + { + "ph": "s", "id": 200, "pid": 2338708, "tid": 2338708, "ts": 6345936455278.569, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936455357.225, "dur": 57.703, + "args": { + "External id": 973930,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936455429.644, "dur": 117.008, + "args": { + "External id": 973931,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936455561.374, "dur": 40.791, + "args": { + "External id": 973932,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936455611.729, "dur": 33.283, + "args": { + "External id": 973933,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 12977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936455676.121, "dur": 30.214, + "args": { + "External id": 973934,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936455729.277, "dur": 20.817, + "args": { + "External id": 973935,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 12979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936455775.758, "dur": 151.850, + "args": { + "External id": 973936,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 12980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936455835.772, "dur": 13.382, + "args": { + "External id": 973937,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 12981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936455842.547, "dur": 5.631, + "args": { + "External id": 973938,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936455852.493, "dur": 4.258, + "args": { + "External id": 973939,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936455858.619, "dur": 1.125, + "args": { + "External id": 973940,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936455862.647, "dur": 6.735, + "args": { + "External id": 973941,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936455939.705, "dur": 52.170, + "args": { + "External id": 973942,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 12986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936456048.217, "dur": 73.506, + "args": { + "External id": 973943,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 12987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936456138.739, "dur": 53.251, + "args": { + "External id": 973944,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936456201.061, "dur": 39.785, + "args": { + "External id": 973945,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 12989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936456267.927, "dur": 30.494, + "args": { + "External id": 973946,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 12990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936456308.418, "dur": 39.082, + "args": { + "External id": 973947,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 12991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936456370.044, "dur": 20.335, + "args": { + "External id": 973948,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 12992 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 2338708, "tid": 2338708, + "ts": 6345936456570.323, "dur": 93.317, + "args": { + "External id": 973949,"Record function id": 0, "Ev Idx": 12993 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936456751.730, "dur": 54.589, + "args": { + "External id": 973950,"Record function id": 0, "Ev Idx": 12994 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 2338708, "tid": 2338708, + "ts": 6345936456816.913, "dur": 30010.823, + "args": { + "External id": 973951,"Record function id": 0, "Ev Idx": 12995 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338708, "tid": 2338708, + "ts": 6345936456825.826, "dur": 1069.069, + "args": { + "External id": 973952,"Record function id": 0, "Ev Idx": 12996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936456914.302, "dur": 9.870, + "args": { + "External id": 973953,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936456942.490, "dur": 46.294, + "args": { + "External id": 973954,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 12998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936456949.215, "dur": 2.686, + "args": { + "External id": 973955,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 12999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936456957.355, "dur": 0.608, + "args": { + "External id": 973956,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936456959.453, "dur": 0.520, + "args": { + "External id": 973957,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936456961.685, "dur": 2.762, + "args": { + "External id": 973958,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936456966.665, "dur": 0.860, + "args": { + "External id": 973959,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936456969.488, "dur": 0.640, + "args": { + "External id": 973960,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936456974.444, "dur": 2.826, + "args": { + "External id": 973961,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936456978.826, "dur": 0.677, + "args": { + "External id": 973962,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936456981.253, "dur": 0.296, + "args": { + "External id": 973963,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936457001.742, "dur": 122.664, + "args": { + "External id": 973964,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936457170.442, "dur": 147.597, + "args": { + "External id": 973965,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936457187.481, "dur": 6.586, + "args": { + "External id": 973966,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936457200.564, "dur": 18.806, + "args": { + "External id": 973967,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936457209.006, "dur": 9.830, + "args": { + "External id": 973968,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936457213.600, "dur": 3.031, + "args": { + "External id": 973969,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936457227.663, "dur": 31.413, + "args": { + "External id": 973970,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936457229.997, "dur": 0.393, + "args": { + "External id": 973971,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936457232.237, "dur": 0.984, + "args": { + "External id": 973972,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936457234.625, "dur": 0.460, + "args": { + "External id": 973973,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936457238.476, "dur": 3.079, + "args": { + "External id": 973974,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936457242.675, "dur": 0.544, + "args": { + "External id": 973975,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936457244.503, "dur": 2.844, + "args": { + "External id": 973976,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936457248.524, "dur": 0.509, + "args": { + "External id": 973977,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936457250.508, "dur": 0.513, + "args": { + "External id": 973978,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936457254.494, "dur": 0.357, + "args": { + "External id": 973979,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936457272.397, "dur": 36.191, + "args": { + "External id": 973980,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936457383.952, "dur": 404.789, + "args": { + "External id": 973981,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936457422.141, "dur": 361.026, + "args": { + "External id": 973982,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13026, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936457434.183, "dur": 342.474, + "args": { + "External id": 973983,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936457814.661, "dur": 2.729, + "args": { + "External id": 973984,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13028, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338708, "tid": 2338708, + "ts": 6345936457919.754, "dur": 28682.331, + "args": { + "External id": 973985,"Record function id": 0, "Ev Idx": 13029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936458053.753, "dur": 43.956, + "args": { + "External id": 973986,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936458106.146, "dur": 1.386, + "args": { + "External id": 973987,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936458109.559, "dur": 3.200, + "args": { + "External id": 973988,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936458114.603, "dur": 0.823, + "args": { + "External id": 973989,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936458117.370, "dur": 0.839, + "args": { + "External id": 973990,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936458119.664, "dur": 1.284, + "args": { + "External id": 973991,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936458124.943, "dur": 1.220, + "args": { + "External id": 973992,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936458127.802, "dur": 2.622, + "args": { + "External id": 973993,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936458131.919, "dur": 1.078, + "args": { + "External id": 973994,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936458134.552, "dur": 0.604, + "args": { + "External id": 973995,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936458160.919, "dur": 28386.335, + "args": { + "External id": 973996,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936458178.805, "dur": 28359.479, + "args": { + "External id": 973997,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936458201.966, "dur": 18.630, + "args": { + "External id": 973998,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936458224.825, "dur": 28273.237, + "args": { + "External id": 973999,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936458228.158, "dur": 28269.241, + "args": { + "External id": 974000,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936458234.913, "dur": 7.420, + "args": { + "External id": 974001,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936458244.460, "dur": 28249.009, + "args": { + "External id": 974002,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936486763.103, "dur": 34.097, + "args": { + "External id": 974003,"Sequence number": 10552267, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13047 + } + }, + { + "ph": "s", "id": 199, "pid": 2338708, "tid": 2338708, "ts": 6345936486763.103, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936486781.588, "dur": 9.970, + "args": { + "External id": 974004,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936486785.882, "dur": 5.420, + "args": { + "External id": 974005,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936486875.034, "dur": 82.592, + "args": { + "External id": 974006,"Record function id": 0, "Ev Idx": 13050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936486959.420, "dur": 1327.639, + "args": { + "External id": 974007,"Record function id": 0, "Ev Idx": 13051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936487005.601, "dur": 1265.284, + "args": { + "External id": 974008,"Sequence number": 10552268, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13052 + } + }, + { + "ph": "s", "id": 198, "pid": 2338708, "tid": 2338708, "ts": 6345936487005.601, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936487133.638, "dur": 57.030, + "args": { + "External id": 974009,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936487206.131, "dur": 117.879, + "args": { + "External id": 974010,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936487338.641, "dur": 41.191, + "args": { + "External id": 974011,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936487389.581, "dur": 32.811, + "args": { + "External id": 974012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936487451.110, "dur": 30.727, + "args": { + "External id": 974013,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936487503.288, "dur": 19.554, + "args": { + "External id": 974014,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936487548.146, "dur": 151.357, + "args": { + "External id": 974015,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936487606.767, "dur": 13.023, + "args": { + "External id": 974016,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936487612.980, "dur": 5.971, + "args": { + "External id": 974017,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936487623.285, "dur": 6.244, + "args": { + "External id": 974018,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936487630.918, "dur": 1.003, + "args": { + "External id": 974019,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936487635.125, "dur": 5.277, + "args": { + "External id": 974020,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936487712.690, "dur": 56.668, + "args": { + "External id": 974021,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936487806.004, "dur": 32.850, + "args": { + "External id": 974022,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936487849.526, "dur": 47.830, + "args": { + "External id": 974023,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936487906.205, "dur": 40.296, + "args": { + "External id": 974024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936487982.473, "dur": 51.537, + "args": { + "External id": 974025,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936488046.304, "dur": 89.304, + "args": { + "External id": 974026,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936488163.276, "dur": 24.220, + "args": { + "External id": 974027,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13071 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 2338708, "tid": 2338708, + "ts": 6345936488367.375, "dur": 92.677, + "args": { + "External id": 974028,"Record function id": 0, "Ev Idx": 13072 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936488546.126, "dur": 56.122, + "args": { + "External id": 974029,"Record function id": 0, "Ev Idx": 13073 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 2338708, "tid": 2338708, + "ts": 6345936488611.986, "dur": 31638.593, + "args": { + "External id": 974030,"Record function id": 0, "Ev Idx": 13074 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338708, "tid": 2338708, + "ts": 6345936488621.473, "dur": 1056.162, + "args": { + "External id": 974031,"Record function id": 0, "Ev Idx": 13075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936488712.221, "dur": 10.211, + "args": { + "External id": 974032,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936488737.686, "dur": 43.152, + "args": { + "External id": 974033,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488744.091, "dur": 3.001, + "args": { + "External id": 974034,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488752.265, "dur": 0.424, + "args": { + "External id": 974035,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488754.209, "dur": 0.378, + "args": { + "External id": 974036,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488755.885, "dur": 0.767, + "args": { + "External id": 974037,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488760.531, "dur": 0.404, + "args": { + "External id": 974038,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488762.192, "dur": 0.864, + "args": { + "External id": 974039,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488764.683, "dur": 4.874, + "args": { + "External id": 974040,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488770.995, "dur": 0.424, + "args": { + "External id": 974041,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488772.906, "dur": 0.570, + "args": { + "External id": 974042,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936488792.359, "dur": 60.017, + "args": { + "External id": 974043,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936488894.254, "dur": 205.751, + "args": { + "External id": 974044,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936488908.873, "dur": 4.868, + "args": { + "External id": 974045,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936488920.880, "dur": 12.604, + "args": { + "External id": 974046,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936488926.485, "dur": 6.536, + "args": { + "External id": 974047,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488930.462, "dur": 1.163, + "args": { + "External id": 974048,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936488941.035, "dur": 34.313, + "args": { + "External id": 974049,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488943.612, "dur": 3.647, + "args": { + "External id": 974050,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488948.696, "dur": 0.484, + "args": { + "External id": 974051,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488950.912, "dur": 0.579, + "args": { + "External id": 974052,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488955.487, "dur": 2.782, + "args": { + "External id": 974053,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488960.312, "dur": 0.537, + "args": { + "External id": 974054,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488962.155, "dur": 0.656, + "args": { + "External id": 974055,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488964.346, "dur": 0.667, + "args": { + "External id": 974056,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488966.240, "dur": 0.591, + "args": { + "External id": 974057,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936488968.167, "dur": 2.651, + "args": { + "External id": 974058,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936488988.475, "dur": 60.041, + "args": { + "External id": 974059,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936489169.509, "dur": 405.847, + "args": { + "External id": 974060,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936489207.118, "dur": 362.637, + "args": { + "External id": 974061,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13105, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936489219.292, "dur": 344.030, + "args": { + "External id": 974062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936489599.066, "dur": 2.543, + "args": { + "External id": 974063,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13107, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338708, "tid": 2338708, + "ts": 6345936489702.389, "dur": 30280.850, + "args": { + "External id": 974064,"Record function id": 0, "Ev Idx": 13108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936489811.396, "dur": 7.909, + "args": { + "External id": 974065,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936489824.202, "dur": 1.626, + "args": { + "External id": 974066,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936489827.666, "dur": 3.499, + "args": { + "External id": 974067,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936489833.369, "dur": 1.067, + "args": { + "External id": 974068,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936489836.063, "dur": 1.283, + "args": { + "External id": 974069,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936489839.086, "dur": 0.881, + "args": { + "External id": 974070,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936489844.145, "dur": 0.841, + "args": { + "External id": 974071,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936489846.567, "dur": 2.173, + "args": { + "External id": 974072,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936489850.236, "dur": 0.954, + "args": { + "External id": 974073,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936489853.077, "dur": 0.621, + "args": { + "External id": 974074,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936489876.343, "dur": 30057.828, + "args": { + "External id": 974075,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936489893.896, "dur": 30031.196, + "args": { + "External id": 974076,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936489915.421, "dur": 19.174, + "args": { + "External id": 974077,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936489938.949, "dur": 29946.382, + "args": { + "External id": 974078,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936489942.345, "dur": 29942.280, + "args": { + "External id": 974079,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936489949.072, "dur": 5.489, + "args": { + "External id": 974080,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936489956.551, "dur": 29924.129, + "args": { + "External id": 974081,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936520184.982, "dur": 34.133, + "args": { + "External id": 974082,"Sequence number": 10552269, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13126 + } + }, + { + "ph": "s", "id": 197, "pid": 2338708, "tid": 2338708, "ts": 6345936520184.982, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936520203.502, "dur": 9.990, + "args": { + "External id": 974083,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936520207.756, "dur": 5.338, + "args": { + "External id": 974084,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936520296.736, "dur": 82.610, + "args": { + "External id": 974085,"Record function id": 0, "Ev Idx": 13129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936520381.021, "dur": 1258.593, + "args": { + "External id": 974086,"Record function id": 0, "Ev Idx": 13130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936520425.694, "dur": 1197.295, + "args": { + "External id": 974087,"Sequence number": 10552270, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13131 + } + }, + { + "ph": "s", "id": 196, "pid": 2338708, "tid": 2338708, "ts": 6345936520425.694, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936520499.413, "dur": 57.922, + "args": { + "External id": 974088,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936520572.144, "dur": 116.344, + "args": { + "External id": 974089,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936520701.556, "dur": 40.036, + "args": { + "External id": 974090,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936520752.927, "dur": 32.628, + "args": { + "External id": 974091,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936520815.107, "dur": 27.597, + "args": { + "External id": 974092,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936520864.338, "dur": 19.126, + "args": { + "External id": 974093,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936520907.783, "dur": 213.809, + "args": { + "External id": 974094,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936520966.206, "dur": 13.029, + "args": { + "External id": 974095,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936520972.417, "dur": 5.837, + "args": { + "External id": 974096,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936520982.898, "dur": 5.766, + "args": { + "External id": 974097,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936520990.063, "dur": 1.701, + "args": { + "External id": 974098,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936520994.858, "dur": 5.078, + "args": { + "External id": 974099,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936521137.294, "dur": 66.450, + "args": { + "External id": 974100,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936521242.101, "dur": 34.428, + "args": { + "External id": 974101,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936521289.519, "dur": 49.075, + "args": { + "External id": 974102,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936521348.200, "dur": 39.519, + "args": { + "External id": 974103,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936521414.961, "dur": 27.861, + "args": { + "External id": 974104,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936521452.145, "dur": 40.826, + "args": { + "External id": 974105,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936521513.809, "dur": 22.158, + "args": { + "External id": 974106,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13150 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 2338708, "tid": 2338708, + "ts": 6345936521716.977, "dur": 87.848, + "args": { + "External id": 974107,"Record function id": 0, "Ev Idx": 13151 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936521891.114, "dur": 54.267, + "args": { + "External id": 974108,"Record function id": 0, "Ev Idx": 13152 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 2338708, "tid": 2338708, + "ts": 6345936521955.824, "dur": 30317.223, + "args": { + "External id": 974109,"Record function id": 0, "Ev Idx": 13153 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338708, "tid": 2338708, + "ts": 6345936521964.560, "dur": 1135.645, + "args": { + "External id": 974110,"Record function id": 0, "Ev Idx": 13154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936522120.434, "dur": 11.731, + "args": { + "External id": 974111,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936522149.142, "dur": 50.494, + "args": { + "External id": 974112,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522161.747, "dur": 2.972, + "args": { + "External id": 974113,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522169.416, "dur": 0.612, + "args": { + "External id": 974114,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522171.173, "dur": 0.461, + "args": { + "External id": 974115,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522172.815, "dur": 2.430, + "args": { + "External id": 974116,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522176.381, "dur": 0.360, + "args": { + "External id": 974117,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522179.921, "dur": 0.541, + "args": { + "External id": 974118,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522183.664, "dur": 2.659, + "args": { + "External id": 974119,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522187.485, "dur": 0.410, + "args": { + "External id": 974120,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522189.697, "dur": 2.639, + "args": { + "External id": 974121,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936522212.526, "dur": 64.081, + "args": { + "External id": 974122,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936522317.739, "dur": 144.672, + "args": { + "External id": 974123,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936522332.675, "dur": 5.414, + "args": { + "External id": 974124,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936522346.975, "dur": 12.647, + "args": { + "External id": 974125,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936522352.568, "dur": 6.606, + "args": { + "External id": 974126,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522356.854, "dur": 0.890, + "args": { + "External id": 974127,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936522367.877, "dur": 35.693, + "args": { + "External id": 974128,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522370.920, "dur": 0.499, + "args": { + "External id": 974129,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522373.941, "dur": 0.794, + "args": { + "External id": 974130,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522376.414, "dur": 2.861, + "args": { + "External id": 974131,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522380.706, "dur": 2.723, + "args": { + "External id": 974132,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522385.238, "dur": 0.432, + "args": { + "External id": 974133,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522390.387, "dur": 0.336, + "args": { + "External id": 974134,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522391.981, "dur": 0.518, + "args": { + "External id": 974135,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522394.215, "dur": 0.564, + "args": { + "External id": 974136,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936522398.249, "dur": 0.503, + "args": { + "External id": 974137,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936522415.630, "dur": 36.804, + "args": { + "External id": 974138,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936522528.795, "dur": 404.355, + "args": { + "External id": 974139,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936522564.439, "dur": 362.615, + "args": { + "External id": 974140,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13184, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936522576.519, "dur": 342.218, + "args": { + "External id": 974141,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936522959.364, "dur": 3.076, + "args": { + "External id": 974142,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13186, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338708, "tid": 2338708, + "ts": 6345936523127.962, "dur": 28875.747, + "args": { + "External id": 974143,"Record function id": 0, "Ev Idx": 13187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936523247.425, "dur": 7.493, + "args": { + "External id": 974144,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936523259.205, "dur": 1.144, + "args": { + "External id": 974145,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936523262.635, "dur": 3.480, + "args": { + "External id": 974146,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936523267.772, "dur": 0.783, + "args": { + "External id": 974147,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936523270.259, "dur": 1.228, + "args": { + "External id": 974148,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936523273.049, "dur": 0.970, + "args": { + "External id": 974149,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936523279.026, "dur": 0.719, + "args": { + "External id": 974150,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936523281.545, "dur": 2.568, + "args": { + "External id": 974151,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936523285.952, "dur": 0.776, + "args": { + "External id": 974152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936523288.367, "dur": 0.891, + "args": { + "External id": 974153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936523312.289, "dur": 28641.898, + "args": { + "External id": 974154,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936523330.486, "dur": 28614.993, + "args": { + "External id": 974155,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936523349.909, "dur": 19.919, + "args": { + "External id": 974156,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936523373.845, "dur": 28530.627, + "args": { + "External id": 974157,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936523377.012, "dur": 28526.714, + "args": { + "External id": 974158,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936523383.935, "dur": 6.009, + "args": { + "External id": 974159,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936523392.420, "dur": 28507.653, + "args": { + "External id": 974160,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936552205.103, "dur": 36.421, + "args": { + "External id": 974161,"Sequence number": 10552271, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13205 + } + }, + { + "ph": "s", "id": 195, "pid": 2338708, "tid": 2338708, "ts": 6345936552205.103, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936552225.872, "dur": 9.964, + "args": { + "External id": 974162,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936552230.266, "dur": 5.200, + "args": { + "External id": 974163,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936552318.863, "dur": 82.898, + "args": { + "External id": 974164,"Record function id": 0, "Ev Idx": 13208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936552403.567, "dur": 1274.869, + "args": { + "External id": 974165,"Record function id": 0, "Ev Idx": 13209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936552449.957, "dur": 1211.240, + "args": { + "External id": 974166,"Sequence number": 10552272, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13210 + } + }, + { + "ph": "s", "id": 194, "pid": 2338708, "tid": 2338708, "ts": 6345936552449.957, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936552526.934, "dur": 57.533, + "args": { + "External id": 974167,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936552597.984, "dur": 113.632, + "args": { + "External id": 974168,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936552726.147, "dur": 43.736, + "args": { + "External id": 974169,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936552780.827, "dur": 32.775, + "args": { + "External id": 974170,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936552840.974, "dur": 30.542, + "args": { + "External id": 974171,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936552894.846, "dur": 17.997, + "args": { + "External id": 974172,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936552938.988, "dur": 223.451, + "args": { + "External id": 974173,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936553000.936, "dur": 33.689, + "args": { + "External id": 974174,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936553007.072, "dur": 26.253, + "args": { + "External id": 974175,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936553039.043, "dur": 4.427, + "args": { + "External id": 974176,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936553045.096, "dur": 1.287, + "args": { + "External id": 974177,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936553089.920, "dur": 7.231, + "args": { + "External id": 974178,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936553177.596, "dur": 64.491, + "args": { + "External id": 974179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936553278.459, "dur": 36.677, + "args": { + "External id": 974180,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936553328.021, "dur": 49.730, + "args": { + "External id": 974181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936553384.656, "dur": 41.538, + "args": { + "External id": 974182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936553451.224, "dur": 32.767, + "args": { + "External id": 974183,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936553490.327, "dur": 42.503, + "args": { + "External id": 974184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936553555.117, "dur": 23.013, + "args": { + "External id": 974185,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13229 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 2338708, "tid": 2338708, + "ts": 6345936553755.847, "dur": 86.877, + "args": { + "External id": 974186,"Record function id": 0, "Ev Idx": 13230 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936553926.345, "dur": 55.779, + "args": { + "External id": 974187,"Record function id": 0, "Ev Idx": 13231 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 2338708, "tid": 2338708, + "ts": 6345936553992.440, "dur": 32488.172, + "args": { + "External id": 974188,"Record function id": 0, "Ev Idx": 13232 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338708, "tid": 2338708, + "ts": 6345936554001.427, "dur": 1140.383, + "args": { + "External id": 974189,"Record function id": 0, "Ev Idx": 13233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936554152.172, "dur": 11.982, + "args": { + "External id": 974190,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936554180.623, "dur": 45.280, + "args": { + "External id": 974191,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554187.974, "dur": 2.932, + "args": { + "External id": 974192,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554195.947, "dur": 0.433, + "args": { + "External id": 974193,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554198.107, "dur": 0.727, + "args": { + "External id": 974194,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554200.473, "dur": 0.393, + "args": { + "External id": 974195,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554204.919, "dur": 0.649, + "args": { + "External id": 974196,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554207.350, "dur": 0.640, + "args": { + "External id": 974197,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554209.972, "dur": 3.878, + "args": { + "External id": 974198,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554215.324, "dur": 0.759, + "args": { + "External id": 974199,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554218.016, "dur": 0.750, + "args": { + "External id": 974200,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936554238.563, "dur": 64.574, + "args": { + "External id": 974201,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936554346.513, "dur": 157.384, + "args": { + "External id": 974202,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936554360.373, "dur": 4.800, + "args": { + "External id": 974203,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936554371.027, "dur": 12.356, + "args": { + "External id": 974204,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936554376.466, "dur": 6.470, + "args": { + "External id": 974205,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554380.793, "dur": 0.857, + "args": { + "External id": 974206,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936554392.254, "dur": 37.960, + "args": { + "External id": 974207,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554395.485, "dur": 3.043, + "args": { + "External id": 974208,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554400.819, "dur": 0.567, + "args": { + "External id": 974209,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554403.170, "dur": 0.562, + "args": { + "External id": 974210,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554407.252, "dur": 2.613, + "args": { + "External id": 974211,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554411.856, "dur": 0.618, + "args": { + "External id": 974212,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554413.961, "dur": 0.307, + "args": { + "External id": 974213,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554417.927, "dur": 0.592, + "args": { + "External id": 974214,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554420.160, "dur": 0.614, + "args": { + "External id": 974215,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936554422.640, "dur": 2.501, + "args": { + "External id": 974216,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936554453.104, "dur": 40.968, + "args": { + "External id": 974217,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936554567.176, "dur": 401.553, + "args": { + "External id": 974218,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936554603.028, "dur": 360.199, + "args": { + "External id": 974219,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13263, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936554617.108, "dur": 340.132, + "args": { + "External id": 974220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936554994.349, "dur": 2.613, + "args": { + "External id": 974221,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13265, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338708, "tid": 2338708, + "ts": 6345936555169.781, "dur": 31086.319, + "args": { + "External id": 974222,"Record function id": 0, "Ev Idx": 13266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936555284.400, "dur": 8.156, + "args": { + "External id": 974223,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936555296.902, "dur": 1.288, + "args": { + "External id": 974224,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936555300.029, "dur": 3.542, + "args": { + "External id": 974225,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936555305.355, "dur": 1.003, + "args": { + "External id": 974226,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936555307.752, "dur": 0.796, + "args": { + "External id": 974227,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936555313.029, "dur": 1.141, + "args": { + "External id": 974228,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936555316.270, "dur": 0.954, + "args": { + "External id": 974229,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936555319.129, "dur": 2.875, + "args": { + "External id": 974230,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936555323.663, "dur": 0.900, + "args": { + "External id": 974231,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936555328.489, "dur": 1.067, + "args": { + "External id": 974232,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936555351.753, "dur": 30854.082, + "args": { + "External id": 974233,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936555371.537, "dur": 30825.322, + "args": { + "External id": 974234,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936555393.017, "dur": 19.220, + "args": { + "External id": 974235,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936555416.443, "dur": 30737.854, + "args": { + "External id": 974236,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936555419.498, "dur": 30734.001, + "args": { + "External id": 974237,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936555427.002, "dur": 5.743, + "args": { + "External id": 974238,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936555434.949, "dur": 30715.064, + "args": { + "External id": 974239,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936586409.847, "dur": 38.576, + "args": { + "External id": 974240,"Sequence number": 10552273, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13284 + } + }, + { + "ph": "s", "id": 193, "pid": 2338708, "tid": 2338708, "ts": 6345936586409.847, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936586432.763, "dur": 10.145, + "args": { + "External id": 974241,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936586437.526, "dur": 5.157, + "args": { + "External id": 974242,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936586528.534, "dur": 81.211, + "args": { + "External id": 974243,"Record function id": 0, "Ev Idx": 13287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936586611.721, "dur": 1273.745, + "args": { + "External id": 974244,"Record function id": 0, "Ev Idx": 13288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936586658.178, "dur": 1210.852, + "args": { + "External id": 974245,"Sequence number": 10552274, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13289 + } + }, + { + "ph": "s", "id": 192, "pid": 2338708, "tid": 2338708, "ts": 6345936586658.178, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936586731.095, "dur": 52.445, + "args": { + "External id": 974246,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936586797.846, "dur": 115.034, + "args": { + "External id": 974247,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936586926.343, "dur": 41.204, + "args": { + "External id": 974248,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936586979.047, "dur": 53.926, + "args": { + "External id": 974249,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936587109.581, "dur": 35.529, + "args": { + "External id": 974250,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936587172.635, "dur": 18.910, + "args": { + "External id": 974251,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936587218.039, "dur": 156.293, + "args": { + "External id": 974252,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936587279.548, "dur": 13.897, + "args": { + "External id": 974253,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936587286.430, "dur": 6.190, + "args": { + "External id": 974254,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936587296.846, "dur": 4.772, + "args": { + "External id": 974255,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936587303.514, "dur": 2.163, + "args": { + "External id": 974256,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936587309.177, "dur": 5.609, + "args": { + "External id": 974257,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936587387.903, "dur": 61.838, + "args": { + "External id": 974258,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936587485.016, "dur": 33.232, + "args": { + "External id": 974259,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936587530.778, "dur": 50.245, + "args": { + "External id": 974260,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936587591.003, "dur": 39.622, + "args": { + "External id": 974261,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936587657.955, "dur": 31.265, + "args": { + "External id": 974262,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936587698.911, "dur": 41.298, + "args": { + "External id": 974263,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936587760.867, "dur": 21.238, + "args": { + "External id": 974264,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13308 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 2338708, "tid": 2338708, + "ts": 6345936587960.096, "dur": 150.794, + "args": { + "External id": 974265,"Record function id": 0, "Ev Idx": 13309 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936588205.488, "dur": 57.068, + "args": { + "External id": 974266,"Record function id": 0, "Ev Idx": 13310 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 2338708, "tid": 2338708, + "ts": 6345936588273.459, "dur": 30085.217, + "args": { + "External id": 974267,"Record function id": 0, "Ev Idx": 13311 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338708, "tid": 2338708, + "ts": 6345936588281.952, "dur": 1142.598, + "args": { + "External id": 974268,"Record function id": 0, "Ev Idx": 13312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936588376.235, "dur": 11.103, + "args": { + "External id": 974269,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936588403.358, "dur": 48.583, + "args": { + "External id": 974270,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588410.421, "dur": 2.750, + "args": { + "External id": 974271,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588418.570, "dur": 0.588, + "args": { + "External id": 974272,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588421.070, "dur": 0.731, + "args": { + "External id": 974273,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588423.661, "dur": 0.943, + "args": { + "External id": 974274,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588428.473, "dur": 0.459, + "args": { + "External id": 974275,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588430.831, "dur": 0.938, + "args": { + "External id": 974276,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588434.011, "dur": 4.964, + "args": { + "External id": 974277,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588441.196, "dur": 0.548, + "args": { + "External id": 974278,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588443.307, "dur": 0.511, + "args": { + "External id": 974279,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936588468.012, "dur": 67.485, + "args": { + "External id": 974280,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936588574.485, "dur": 146.611, + "args": { + "External id": 974281,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936588587.005, "dur": 7.221, + "args": { + "External id": 974282,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936588602.992, "dur": 12.848, + "args": { + "External id": 974283,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936588608.230, "dur": 6.754, + "args": { + "External id": 974284,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588612.766, "dur": 0.839, + "args": { + "External id": 974285,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936588624.885, "dur": 35.560, + "args": { + "External id": 974286,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588628.224, "dur": 0.639, + "args": { + "External id": 974287,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588630.997, "dur": 2.918, + "args": { + "External id": 974288,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588635.925, "dur": 0.426, + "args": { + "External id": 974289,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588638.309, "dur": 2.679, + "args": { + "External id": 974290,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588644.865, "dur": 0.413, + "args": { + "External id": 974291,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588647.225, "dur": 0.298, + "args": { + "External id": 974292,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588649.114, "dur": 0.331, + "args": { + "External id": 974293,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588653.558, "dur": 0.373, + "args": { + "External id": 974294,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936588655.445, "dur": 0.289, + "args": { + "External id": 974295,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936588673.011, "dur": 38.743, + "args": { + "External id": 974296,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936588784.805, "dur": 511.818, + "args": { + "External id": 974297,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936588821.151, "dur": 468.296, + "args": { + "External id": 974298,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13342, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936588833.243, "dur": 448.334, + "args": { + "External id": 974299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936589327.906, "dur": 2.895, + "args": { + "External id": 974300,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13344, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338708, "tid": 2338708, + "ts": 6345936589449.646, "dur": 28687.822, + "args": { + "External id": 974301,"Record function id": 0, "Ev Idx": 13345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936589568.089, "dur": 8.023, + "args": { + "External id": 974302,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936589580.310, "dur": 1.326, + "args": { + "External id": 974303,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936589584.054, "dur": 3.303, + "args": { + "External id": 974304,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936589589.179, "dur": 0.924, + "args": { + "External id": 974305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936589591.835, "dur": 1.291, + "args": { + "External id": 974306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936589594.422, "dur": 0.838, + "args": { + "External id": 974307,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936589599.911, "dur": 0.866, + "args": { + "External id": 974308,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936589602.520, "dur": 2.277, + "args": { + "External id": 974309,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936589606.910, "dur": 1.036, + "args": { + "External id": 974310,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936589609.788, "dur": 0.893, + "args": { + "External id": 974311,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936589635.260, "dur": 28451.082, + "args": { + "External id": 974312,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936589654.691, "dur": 28395.808, + "args": { + "External id": 974313,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936589679.285, "dur": 19.970, + "args": { + "External id": 974314,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936589703.922, "dur": 28293.340, + "args": { + "External id": 974315,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936589707.114, "dur": 28289.414, + "args": { + "External id": 974316,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936589714.248, "dur": 5.794, + "args": { + "External id": 974317,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936589722.335, "dur": 28270.708, + "args": { + "External id": 974318,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936618291.217, "dur": 37.624, + "args": { + "External id": 974319,"Sequence number": 10552275, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13363 + } + }, + { + "ph": "s", "id": 191, "pid": 2338708, "tid": 2338708, "ts": 6345936618291.217, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936618312.732, "dur": 10.597, + "args": { + "External id": 974320,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936618317.763, "dur": 5.280, + "args": { + "External id": 974321,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936618402.368, "dur": 82.043, + "args": { + "External id": 974322,"Record function id": 0, "Ev Idx": 13366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936618486.353, "dur": 1316.150, + "args": { + "External id": 974323,"Record function id": 0, "Ev Idx": 13367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936618533.041, "dur": 1252.770, + "args": { + "External id": 974324,"Sequence number": 10552276, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13368 + } + }, + { + "ph": "s", "id": 190, "pid": 2338708, "tid": 2338708, "ts": 6345936618533.041, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936618611.302, "dur": 56.811, + "args": { + "External id": 974325,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936618683.219, "dur": 117.287, + "args": { + "External id": 974326,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936618813.441, "dur": 42.167, + "args": { + "External id": 974327,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936618867.220, "dur": 33.305, + "args": { + "External id": 974328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936618936.818, "dur": 33.911, + "args": { + "External id": 974329,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936618991.871, "dur": 41.879, + "args": { + "External id": 974330,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936619102.522, "dur": 161.617, + "args": { + "External id": 974331,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936619164.393, "dur": 15.179, + "args": { + "External id": 974332,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936619170.939, "dur": 7.447, + "args": { + "External id": 974333,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936619183.326, "dur": 5.032, + "args": { + "External id": 974334,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936619190.052, "dur": 1.508, + "args": { + "External id": 974335,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936619194.619, "dur": 6.708, + "args": { + "External id": 974336,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936619300.931, "dur": 68.125, + "args": { + "External id": 974337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936619413.552, "dur": 35.416, + "args": { + "External id": 974338,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936619460.880, "dur": 47.432, + "args": { + "External id": 974339,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936619520.052, "dur": 39.406, + "args": { + "External id": 974340,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936619584.595, "dur": 27.281, + "args": { + "External id": 974341,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936619618.499, "dur": 39.268, + "args": { + "External id": 974342,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936619683.533, "dur": 19.530, + "args": { + "External id": 974343,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 2338708, "tid": 2338708, + "ts": 6345936619880.681, "dur": 90.742, + "args": { + "External id": 974344,"Record function id": 0, "Ev Idx": 13388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936620124.735, "dur": 60.483, + "args": { + "External id": 974345,"Record function id": 0, "Ev Idx": 13389 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 2338708, "tid": 2338708, + "ts": 6345936620196.222, "dur": 30545.584, + "args": { + "External id": 974346,"Record function id": 0, "Ev Idx": 13390 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338708, "tid": 2338708, + "ts": 6345936620205.972, "dur": 1099.326, + "args": { + "External id": 974347,"Record function id": 0, "Ev Idx": 13391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936620301.802, "dur": 11.227, + "args": { + "External id": 974348,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936620329.207, "dur": 45.902, + "args": { + "External id": 974349,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620336.218, "dur": 2.751, + "args": { + "External id": 974350,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620343.769, "dur": 0.569, + "args": { + "External id": 974351,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620346.206, "dur": 0.463, + "args": { + "External id": 974352,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620348.289, "dur": 0.781, + "args": { + "External id": 974353,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620352.615, "dur": 0.454, + "args": { + "External id": 974354,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620354.873, "dur": 0.952, + "args": { + "External id": 974355,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620357.620, "dur": 5.396, + "args": { + "External id": 974356,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620364.994, "dur": 0.361, + "args": { + "External id": 974357,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620367.333, "dur": 0.406, + "args": { + "External id": 974358,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936620389.262, "dur": 65.168, + "args": { + "External id": 974359,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936620498.016, "dur": 145.113, + "args": { + "External id": 974360,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936620510.544, "dur": 4.867, + "args": { + "External id": 974361,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936620522.124, "dur": 12.616, + "args": { + "External id": 974362,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936620527.386, "dur": 6.894, + "args": { + "External id": 974363,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620531.838, "dur": 0.685, + "args": { + "External id": 974364,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936620542.749, "dur": 39.087, + "args": { + "External id": 974365,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620546.002, "dur": 2.703, + "args": { + "External id": 974366,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620550.799, "dur": 0.568, + "args": { + "External id": 974367,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620553.149, "dur": 0.486, + "args": { + "External id": 974368,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620558.127, "dur": 2.525, + "args": { + "External id": 974369,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620562.452, "dur": 0.518, + "args": { + "External id": 974370,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620564.769, "dur": 0.380, + "args": { + "External id": 974371,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620568.981, "dur": 0.388, + "args": { + "External id": 974372,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620571.096, "dur": 0.407, + "args": { + "External id": 974373,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936620573.562, "dur": 3.457, + "args": { + "External id": 974374,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936620596.909, "dur": 36.921, + "args": { + "External id": 974375,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936620706.202, "dur": 474.996, + "args": { + "External id": 974376,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936620742.388, "dur": 431.697, + "args": { + "External id": 974377,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13421, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936620754.142, "dur": 412.849, + "args": { + "External id": 974378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936621212.694, "dur": 3.016, + "args": { + "External id": 974379,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13423, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338708, "tid": 2338708, + "ts": 6345936621330.606, "dur": 29190.943, + "args": { + "External id": 974380,"Record function id": 0, "Ev Idx": 13424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936621448.492, "dur": 8.098, + "args": { + "External id": 974381,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936621460.906, "dur": 0.939, + "args": { + "External id": 974382,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936621463.770, "dur": 3.504, + "args": { + "External id": 974383,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936621469.143, "dur": 0.904, + "args": { + "External id": 974384,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936621471.830, "dur": 0.892, + "args": { + "External id": 974385,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936621474.342, "dur": 0.997, + "args": { + "External id": 974386,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936621479.110, "dur": 0.914, + "args": { + "External id": 974387,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936621481.858, "dur": 2.490, + "args": { + "External id": 974388,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936621486.059, "dur": 0.582, + "args": { + "External id": 974389,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936621488.472, "dur": 0.732, + "args": { + "External id": 974390,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936621513.679, "dur": 28959.422, + "args": { + "External id": 974391,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936621533.364, "dur": 28930.882, + "args": { + "External id": 974392,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936621559.833, "dur": 17.598, + "args": { + "External id": 974393,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936621581.282, "dur": 28845.708, + "args": { + "External id": 974394,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936621584.668, "dur": 28841.547, + "args": { + "External id": 974395,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936621592.085, "dur": 6.549, + "args": { + "External id": 974396,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936621600.642, "dur": 28822.086, + "args": { + "External id": 974397,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936650676.900, "dur": 34.952, + "args": { + "External id": 974398,"Sequence number": 10552277, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13442 + } + }, + { + "ph": "s", "id": 189, "pid": 2338708, "tid": 2338708, "ts": 6345936650676.900, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936650696.115, "dur": 10.106, + "args": { + "External id": 974399,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936650700.880, "dur": 5.100, + "args": { + "External id": 974400,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936650788.572, "dur": 81.536, + "args": { + "External id": 974401,"Record function id": 0, "Ev Idx": 13445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936650871.786, "dur": 1328.990, + "args": { + "External id": 974402,"Record function id": 0, "Ev Idx": 13446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936650915.574, "dur": 1267.763, + "args": { + "External id": 974403,"Sequence number": 10552278, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13447 + } + }, + { + "ph": "s", "id": 188, "pid": 2338708, "tid": 2338708, "ts": 6345936650915.574, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936650990.045, "dur": 109.242, + "args": { + "External id": 974404,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936651119.065, "dur": 117.874, + "args": { + "External id": 974405,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936651250.409, "dur": 41.274, + "args": { + "External id": 974406,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936651303.310, "dur": 34.268, + "args": { + "External id": 974407,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936651368.276, "dur": 29.360, + "args": { + "External id": 974408,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936651420.792, "dur": 20.472, + "args": { + "External id": 974409,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936651467.717, "dur": 156.516, + "args": { + "External id": 974410,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936651530.007, "dur": 14.489, + "args": { + "External id": 974411,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936651537.046, "dur": 6.305, + "args": { + "External id": 974412,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936651547.794, "dur": 4.312, + "args": { + "External id": 974413,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936651553.478, "dur": 1.432, + "args": { + "External id": 974414,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936651557.798, "dur": 5.735, + "args": { + "External id": 974415,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936651637.104, "dur": 53.832, + "args": { + "External id": 974416,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936651727.343, "dur": 33.718, + "args": { + "External id": 974417,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936651772.731, "dur": 48.027, + "args": { + "External id": 974418,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936651831.217, "dur": 39.315, + "args": { + "External id": 974419,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936651898.028, "dur": 31.166, + "args": { + "External id": 974420,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936651937.536, "dur": 49.767, + "args": { + "External id": 974421,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936652025.925, "dur": 24.922, + "args": { + "External id": 974422,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13466 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 2338708, "tid": 2338708, + "ts": 6345936652280.909, "dur": 88.825, + "args": { + "External id": 974423,"Record function id": 0, "Ev Idx": 13467 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936652458.357, "dur": 54.230, + "args": { + "External id": 974424,"Record function id": 0, "Ev Idx": 13468 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 2338708, "tid": 2338708, + "ts": 6345936652523.481, "dur": 32886.884, + "args": { + "External id": 974425,"Record function id": 0, "Ev Idx": 13469 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338708, "tid": 2338708, + "ts": 6345936652532.824, "dur": 1075.301, + "args": { + "External id": 974426,"Record function id": 0, "Ev Idx": 13470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936652622.866, "dur": 10.168, + "args": { + "External id": 974427,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936652648.451, "dur": 46.517, + "args": { + "External id": 974428,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652655.001, "dur": 2.795, + "args": { + "External id": 974429,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652663.235, "dur": 0.626, + "args": { + "External id": 974430,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652667.108, "dur": 0.673, + "args": { + "External id": 974431,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652670.086, "dur": 0.444, + "args": { + "External id": 974432,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652673.870, "dur": 0.717, + "args": { + "External id": 974433,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652676.381, "dur": 0.954, + "args": { + "External id": 974434,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652678.890, "dur": 4.647, + "args": { + "External id": 974435,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652685.446, "dur": 0.401, + "args": { + "External id": 974436,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652687.395, "dur": 0.346, + "args": { + "External id": 974437,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936652709.792, "dur": 64.040, + "args": { + "External id": 974438,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936652811.497, "dur": 145.326, + "args": { + "External id": 974439,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936652829.221, "dur": 6.933, + "args": { + "External id": 974440,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936652842.876, "dur": 12.439, + "args": { + "External id": 974441,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936652848.155, "dur": 6.714, + "args": { + "External id": 974442,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652852.611, "dur": 0.841, + "args": { + "External id": 974443,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936652863.881, "dur": 33.340, + "args": { + "External id": 974444,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652866.489, "dur": 0.618, + "args": { + "External id": 974445,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652869.415, "dur": 2.222, + "args": { + "External id": 974446,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652873.268, "dur": 0.692, + "args": { + "External id": 974447,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652875.338, "dur": 2.792, + "args": { + "External id": 974448,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652881.374, "dur": 0.346, + "args": { + "External id": 974449,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652883.688, "dur": 0.596, + "args": { + "External id": 974450,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652885.756, "dur": 0.711, + "args": { + "External id": 974451,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652890.137, "dur": 0.368, + "args": { + "External id": 974452,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936652892.039, "dur": 0.413, + "args": { + "External id": 974453,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936652909.447, "dur": 38.135, + "args": { + "External id": 974454,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936653039.962, "dur": 452.075, + "args": { + "External id": 974455,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936653117.743, "dur": 368.027, + "args": { + "External id": 974456,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13500, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936653131.233, "dur": 348.350, + "args": { + "External id": 974457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936653521.112, "dur": 2.752, + "args": { + "External id": 974458,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13502, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338708, "tid": 2338708, + "ts": 6345936653634.077, "dur": 31547.417, + "args": { + "External id": 974459,"Record function id": 0, "Ev Idx": 13503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936653749.985, "dur": 7.586, + "args": { + "External id": 974460,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936653761.851, "dur": 0.970, + "args": { + "External id": 974461,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936653765.399, "dur": 3.421, + "args": { + "External id": 974462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936653770.742, "dur": 0.678, + "args": { + "External id": 974463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936653773.360, "dur": 1.006, + "args": { + "External id": 974464,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936653775.744, "dur": 0.957, + "args": { + "External id": 974465,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936653778.804, "dur": 0.975, + "args": { + "External id": 974466,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936653781.789, "dur": 2.305, + "args": { + "External id": 974467,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936653786.231, "dur": 0.862, + "args": { + "External id": 974468,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936653790.836, "dur": 0.561, + "args": { + "External id": 974469,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936653813.027, "dur": 31317.055, + "args": { + "External id": 974470,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936653831.358, "dur": 31288.511, + "args": { + "External id": 974471,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936653853.369, "dur": 17.753, + "args": { + "External id": 974472,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936653875.238, "dur": 31174.777, + "args": { + "External id": 974473,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936653878.098, "dur": 31171.094, + "args": { + "External id": 974474,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936653885.406, "dur": 6.336, + "args": { + "External id": 974475,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936653893.900, "dur": 31151.663, + "args": { + "External id": 974476,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936685341.846, "dur": 37.001, + "args": { + "External id": 974477,"Sequence number": 10552279, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13521 + } + }, + { + "ph": "s", "id": 187, "pid": 2338708, "tid": 2338708, "ts": 6345936685341.846, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936685362.776, "dur": 10.032, + "args": { + "External id": 974478,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936685367.220, "dur": 5.289, + "args": { + "External id": 974479,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936685457.270, "dur": 80.419, + "args": { + "External id": 974480,"Record function id": 0, "Ev Idx": 13524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936685539.447, "dur": 1288.416, + "args": { + "External id": 974481,"Record function id": 0, "Ev Idx": 13525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936685583.040, "dur": 1228.615, + "args": { + "External id": 974482,"Sequence number": 10552280, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13526 + } + }, + { + "ph": "s", "id": 186, "pid": 2338708, "tid": 2338708, "ts": 6345936685583.040, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936685666.315, "dur": 53.723, + "args": { + "External id": 974483,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936685737.089, "dur": 115.909, + "args": { + "External id": 974484,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936685866.162, "dur": 42.113, + "args": { + "External id": 974485,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936685915.124, "dur": 31.907, + "args": { + "External id": 974486,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936685978.383, "dur": 49.937, + "args": { + "External id": 974487,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936686094.090, "dur": 28.904, + "args": { + "External id": 974488,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936686150.939, "dur": 159.675, + "args": { + "External id": 974489,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936686211.574, "dur": 15.333, + "args": { + "External id": 974490,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936686218.656, "dur": 7.209, + "args": { + "External id": 974491,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936686230.256, "dur": 5.032, + "args": { + "External id": 974492,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936686237.214, "dur": 3.160, + "args": { + "External id": 974493,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936686243.700, "dur": 5.228, + "args": { + "External id": 974494,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936686324.444, "dur": 65.780, + "args": { + "External id": 974495,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936686427.724, "dur": 35.083, + "args": { + "External id": 974496,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936686474.304, "dur": 49.851, + "args": { + "External id": 974497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936686533.019, "dur": 40.166, + "args": { + "External id": 974498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936686599.945, "dur": 29.071, + "args": { + "External id": 974499,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936686635.781, "dur": 42.029, + "args": { + "External id": 974500,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936686700.874, "dur": 25.905, + "args": { + "External id": 974501,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13545 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 2338708, "tid": 2338708, + "ts": 6345936686902.732, "dur": 86.667, + "args": { + "External id": 974502,"Record function id": 0, "Ev Idx": 13546 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936687144.183, "dur": 57.930, + "args": { + "External id": 974503,"Record function id": 0, "Ev Idx": 13547 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 2338708, "tid": 2338708, + "ts": 6345936687214.335, "dur": 31084.397, + "args": { + "External id": 974504,"Record function id": 0, "Ev Idx": 13548 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338708, "tid": 2338708, + "ts": 6345936687224.148, "dur": 1083.749, + "args": { + "External id": 974505,"Record function id": 0, "Ev Idx": 13549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936687317.334, "dur": 10.940, + "args": { + "External id": 974506,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936687343.436, "dur": 45.753, + "args": { + "External id": 974507,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687350.302, "dur": 2.925, + "args": { + "External id": 974508,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687358.682, "dur": 0.364, + "args": { + "External id": 974509,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687361.302, "dur": 0.547, + "args": { + "External id": 974510,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687363.650, "dur": 0.588, + "args": { + "External id": 974511,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687368.066, "dur": 0.446, + "args": { + "External id": 974512,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687370.075, "dur": 0.900, + "args": { + "External id": 974513,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687372.914, "dur": 4.789, + "args": { + "External id": 974514,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687379.176, "dur": 0.634, + "args": { + "External id": 974515,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687381.585, "dur": 0.516, + "args": { + "External id": 974516,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936687402.178, "dur": 63.857, + "args": { + "External id": 974517,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936687505.065, "dur": 145.298, + "args": { + "External id": 974518,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936687517.934, "dur": 4.786, + "args": { + "External id": 974519,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936687529.473, "dur": 12.114, + "args": { + "External id": 974520,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936687534.684, "dur": 6.455, + "args": { + "External id": 974521,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687538.948, "dur": 0.642, + "args": { + "External id": 974522,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936687550.122, "dur": 37.831, + "args": { + "External id": 974523,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687553.238, "dur": 2.921, + "args": { + "External id": 974524,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687558.435, "dur": 0.333, + "args": { + "External id": 974525,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687560.670, "dur": 0.276, + "args": { + "External id": 974526,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687564.965, "dur": 3.080, + "args": { + "External id": 974527,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687570.055, "dur": 0.586, + "args": { + "External id": 974528,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687572.445, "dur": 0.641, + "args": { + "External id": 974529,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687576.568, "dur": 0.320, + "args": { + "External id": 974530,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687578.461, "dur": 0.450, + "args": { + "External id": 974531,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936687580.829, "dur": 2.386, + "args": { + "External id": 974532,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936687601.696, "dur": 39.859, + "args": { + "External id": 974533,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936687711.443, "dur": 474.523, + "args": { + "External id": 974534,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936687747.306, "dur": 432.349, + "args": { + "External id": 974535,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13579, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936687758.969, "dur": 413.094, + "args": { + "External id": 974536,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936688217.439, "dur": 3.012, + "args": { + "External id": 974537,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13581, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338708, "tid": 2338708, + "ts": 6345936688333.464, "dur": 29708.663, + "args": { + "External id": 974538,"Record function id": 0, "Ev Idx": 13582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936688450.740, "dur": 7.515, + "args": { + "External id": 974539,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936688477.300, "dur": 1.449, + "args": { + "External id": 974540,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936688480.760, "dur": 2.831, + "args": { + "External id": 974541,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936688487.891, "dur": 0.713, + "args": { + "External id": 974542,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936688490.009, "dur": 0.891, + "args": { + "External id": 974543,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936688492.272, "dur": 1.073, + "args": { + "External id": 974544,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936688494.936, "dur": 0.938, + "args": { + "External id": 974545,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936688500.050, "dur": 2.679, + "args": { + "External id": 974546,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936688504.591, "dur": 0.866, + "args": { + "External id": 974547,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936688507.667, "dur": 0.633, + "args": { + "External id": 974548,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936688532.867, "dur": 29447.740, + "args": { + "External id": 974549,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936688551.964, "dur": 29420.590, + "args": { + "External id": 974550,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936688568.460, "dur": 18.040, + "args": { + "External id": 974551,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936688593.181, "dur": 29338.530, + "args": { + "External id": 974552,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936688596.291, "dur": 29334.716, + "args": { + "External id": 974553,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936688603.457, "dur": 6.308, + "args": { + "External id": 974554,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936688611.841, "dur": 29315.645, + "args": { + "External id": 974555,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936718228.067, "dur": 38.975, + "args": { + "External id": 974556,"Sequence number": 10552281, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13600 + } + }, + { + "ph": "s", "id": 185, "pid": 2338708, "tid": 2338708, "ts": 6345936718228.067, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936718250.676, "dur": 10.070, + "args": { + "External id": 974557,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936718255.105, "dur": 5.130, + "args": { + "External id": 974558,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936718345.179, "dur": 83.770, + "args": { + "External id": 974559,"Record function id": 0, "Ev Idx": 13603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936718430.835, "dur": 1293.841, + "args": { + "External id": 974560,"Record function id": 0, "Ev Idx": 13604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936718474.196, "dur": 1233.859, + "args": { + "External id": 974561,"Sequence number": 10552282, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13605 + } + }, + { + "ph": "s", "id": 184, "pid": 2338708, "tid": 2338708, "ts": 6345936718474.196, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936718554.423, "dur": 57.153, + "args": { + "External id": 974562,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936718625.669, "dur": 120.122, + "args": { + "External id": 974563,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936718759.309, "dur": 42.672, + "args": { + "External id": 974564,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936718811.796, "dur": 34.175, + "args": { + "External id": 974565,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936718873.976, "dur": 31.327, + "args": { + "External id": 974566,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936718930.626, "dur": 21.556, + "args": { + "External id": 974567,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936718977.304, "dur": 220.853, + "args": { + "External id": 974568,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936719094.901, "dur": 16.274, + "args": { + "External id": 974569,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936719102.488, "dur": 7.430, + "args": { + "External id": 974570,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936719114.482, "dur": 4.509, + "args": { + "External id": 974571,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936719120.476, "dur": 1.547, + "args": { + "External id": 974572,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936719124.998, "dur": 5.430, + "args": { + "External id": 974573,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936719212.816, "dur": 64.431, + "args": { + "External id": 974574,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936719316.088, "dur": 37.034, + "args": { + "External id": 974575,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936719365.291, "dur": 49.130, + "args": { + "External id": 974576,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936719426.658, "dur": 39.441, + "args": { + "External id": 974577,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936719492.470, "dur": 33.483, + "args": { + "External id": 974578,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936719536.028, "dur": 40.918, + "args": { + "External id": 974579,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936719600.228, "dur": 20.778, + "args": { + "External id": 974580,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13624 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 2338708, "tid": 2338708, + "ts": 6345936719803.488, "dur": 91.972, + "args": { + "External id": 974581,"Record function id": 0, "Ev Idx": 13625 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936719984.247, "dur": 120.636, + "args": { + "External id": 974582,"Record function id": 0, "Ev Idx": 13626 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 2338708, "tid": 2338708, + "ts": 6345936720119.163, "dur": 32184.450, + "args": { + "External id": 974583,"Record function id": 0, "Ev Idx": 13627 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338708, "tid": 2338708, + "ts": 6345936720130.460, "dur": 1091.165, + "args": { + "External id": 974584,"Record function id": 0, "Ev Idx": 13628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936720225.842, "dur": 10.872, + "args": { + "External id": 974585,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936720253.322, "dur": 46.198, + "args": { + "External id": 974586,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720260.303, "dur": 2.834, + "args": { + "External id": 974587,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720268.622, "dur": 0.572, + "args": { + "External id": 974588,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720271.155, "dur": 0.439, + "args": { + "External id": 974589,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720273.802, "dur": 0.686, + "args": { + "External id": 974590,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720278.008, "dur": 0.731, + "args": { + "External id": 974591,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720280.475, "dur": 0.429, + "args": { + "External id": 974592,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720283.029, "dur": 5.029, + "args": { + "External id": 974593,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720289.800, "dur": 0.447, + "args": { + "External id": 974594,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720291.791, "dur": 0.684, + "args": { + "External id": 974595,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936720313.509, "dur": 65.779, + "args": { + "External id": 974596,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936720419.820, "dur": 145.449, + "args": { + "External id": 974597,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936720432.138, "dur": 5.038, + "args": { + "External id": 974598,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936720444.070, "dur": 12.383, + "args": { + "External id": 974599,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936720449.284, "dur": 6.690, + "args": { + "External id": 974600,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720453.555, "dur": 1.008, + "args": { + "External id": 974601,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936720466.369, "dur": 38.702, + "args": { + "External id": 974602,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720469.138, "dur": 2.637, + "args": { + "External id": 974603,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720473.665, "dur": 0.464, + "args": { + "External id": 974604,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720480.156, "dur": 0.295, + "args": { + "External id": 974605,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720484.013, "dur": 2.355, + "args": { + "External id": 974606,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720488.003, "dur": 1.019, + "args": { + "External id": 974607,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720490.742, "dur": 1.952, + "args": { + "External id": 974608,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720494.102, "dur": 0.563, + "args": { + "External id": 974609,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720496.367, "dur": 0.777, + "args": { + "External id": 974610,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936720500.317, "dur": 0.373, + "args": { + "External id": 974611,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936720516.569, "dur": 39.514, + "args": { + "External id": 974612,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936720628.881, "dur": 463.703, + "args": { + "External id": 974613,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936720664.876, "dur": 385.084, + "args": { + "External id": 974614,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13658, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936720676.910, "dur": 365.638, + "args": { + "External id": 974615,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936721125.904, "dur": 4.215, + "args": { + "External id": 974616,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13660, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338708, "tid": 2338708, + "ts": 6345936721246.813, "dur": 30796.514, + "args": { + "External id": 974617,"Record function id": 0, "Ev Idx": 13661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936721365.187, "dur": 7.819, + "args": { + "External id": 974618,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936721377.394, "dur": 1.036, + "args": { + "External id": 974619,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936721380.758, "dur": 3.488, + "args": { + "External id": 974620,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936721386.294, "dur": 0.944, + "args": { + "External id": 974621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936721388.653, "dur": 0.836, + "args": { + "External id": 974622,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936721390.994, "dur": 0.780, + "args": { + "External id": 974623,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936721393.992, "dur": 0.853, + "args": { + "External id": 974624,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936721397.064, "dur": 2.294, + "args": { + "External id": 974625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936721401.086, "dur": 0.842, + "args": { + "External id": 974626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936721406.264, "dur": 0.984, + "args": { + "External id": 974627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936721428.394, "dur": 30553.806, + "args": { + "External id": 974628,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936721446.456, "dur": 30527.189, + "args": { + "External id": 974629,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936721468.190, "dur": 20.214, + "args": { + "External id": 974630,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936721492.558, "dur": 30441.316, + "args": { + "External id": 974631,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936721495.649, "dur": 30437.467, + "args": { + "External id": 974632,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936721502.656, "dur": 6.478, + "args": { + "External id": 974633,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936721511.172, "dur": 30418.430, + "args": { + "External id": 974634,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936752231.026, "dur": 41.533, + "args": { + "External id": 974635,"Sequence number": 10552283, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13679 + } + }, + { + "ph": "s", "id": 183, "pid": 2338708, "tid": 2338708, "ts": 6345936752231.026, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936752255.604, "dur": 10.648, + "args": { + "External id": 974636,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936752260.525, "dur": 5.345, + "args": { + "External id": 974637,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936752349.451, "dur": 79.647, + "args": { + "External id": 974638,"Record function id": 0, "Ev Idx": 13682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936752430.944, "dur": 1262.901, + "args": { + "External id": 974639,"Record function id": 0, "Ev Idx": 13683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936752472.650, "dur": 1204.506, + "args": { + "External id": 974640,"Sequence number": 10552284, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13684 + } + }, + { + "ph": "s", "id": 182, "pid": 2338708, "tid": 2338708, "ts": 6345936752472.650, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936752549.909, "dur": 58.774, + "args": { + "External id": 974641,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936752622.756, "dur": 117.919, + "args": { + "External id": 974642,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936752757.388, "dur": 39.837, + "args": { + "External id": 974643,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936752805.159, "dur": 32.562, + "args": { + "External id": 974644,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936752868.813, "dur": 30.808, + "args": { + "External id": 974645,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936752922.298, "dur": 18.109, + "args": { + "External id": 974646,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936752964.407, "dur": 221.890, + "args": { + "External id": 974647,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936753043.032, "dur": 53.886, + "args": { + "External id": 974648,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936753050.258, "dur": 45.371, + "args": { + "External id": 974649,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936753101.347, "dur": 5.389, + "args": { + "External id": 974650,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936753108.628, "dur": 1.513, + "args": { + "External id": 974651,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936753115.680, "dur": 5.586, + "args": { + "External id": 974652,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936753200.489, "dur": 64.627, + "args": { + "External id": 974653,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936753303.156, "dur": 34.724, + "args": { + "External id": 974654,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936753349.418, "dur": 47.766, + "args": { + "External id": 974655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936753405.920, "dur": 39.893, + "args": { + "External id": 974656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936753471.103, "dur": 30.960, + "args": { + "External id": 974657,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936753508.183, "dur": 40.408, + "args": { + "External id": 974658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936753570.791, "dur": 20.170, + "args": { + "External id": 974659,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13703 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 2338708, "tid": 2338708, + "ts": 6345936753770.752, "dur": 87.339, + "args": { + "External id": 974660,"Record function id": 0, "Ev Idx": 13704 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936753948.373, "dur": 52.838, + "args": { + "External id": 974661,"Record function id": 0, "Ev Idx": 13705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 2338708, "tid": 2338708, + "ts": 6345936754033.625, "dur": 32534.630, + "args": { + "External id": 974662,"Record function id": 0, "Ev Idx": 13706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338708, "tid": 2338708, + "ts": 6345936754043.941, "dur": 1207.070, + "args": { + "External id": 974663,"Record function id": 0, "Ev Idx": 13707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936754178.868, "dur": 12.270, + "args": { + "External id": 974664,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936754206.523, "dur": 44.434, + "args": { + "External id": 974665,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754213.665, "dur": 3.100, + "args": { + "External id": 974666,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754221.528, "dur": 0.586, + "args": { + "External id": 974667,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754224.030, "dur": 0.908, + "args": { + "External id": 974668,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754226.772, "dur": 0.340, + "args": { + "External id": 974669,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754230.532, "dur": 0.665, + "args": { + "External id": 974670,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754232.992, "dur": 0.716, + "args": { + "External id": 974671,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754235.332, "dur": 4.320, + "args": { + "External id": 974672,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754241.148, "dur": 0.632, + "args": { + "External id": 974673,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754243.670, "dur": 0.294, + "args": { + "External id": 974674,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936754263.985, "dur": 65.613, + "args": { + "External id": 974675,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936754370.261, "dur": 144.746, + "args": { + "External id": 974676,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936754382.880, "dur": 5.123, + "args": { + "External id": 974677,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936754394.222, "dur": 12.575, + "args": { + "External id": 974678,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936754399.522, "dur": 6.822, + "args": { + "External id": 974679,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754404.329, "dur": 0.535, + "args": { + "External id": 974680,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936754415.924, "dur": 35.044, + "args": { + "External id": 974681,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754418.621, "dur": 2.481, + "args": { + "External id": 974682,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754422.953, "dur": 0.683, + "args": { + "External id": 974683,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754425.065, "dur": 0.609, + "args": { + "External id": 974684,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754430.190, "dur": 2.836, + "args": { + "External id": 974685,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754434.913, "dur": 0.335, + "args": { + "External id": 974686,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754436.801, "dur": 0.431, + "args": { + "External id": 974687,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754440.312, "dur": 0.451, + "args": { + "External id": 974688,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754442.189, "dur": 0.448, + "args": { + "External id": 974689,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936754444.335, "dur": 2.000, + "args": { + "External id": 974690,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936754466.438, "dur": 38.886, + "args": { + "External id": 974691,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936754586.437, "dur": 532.775, + "args": { + "External id": 974692,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936754636.354, "dur": 475.443, + "args": { + "External id": 974693,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13737, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936754648.598, "dur": 454.964, + "args": { + "External id": 974694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936755153.097, "dur": 3.211, + "args": { + "External id": 974695,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13739, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338708, "tid": 2338708, + "ts": 6345936755279.043, "dur": 31067.265, + "args": { + "External id": 974696,"Record function id": 0, "Ev Idx": 13740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936755400.970, "dur": 8.405, + "args": { + "External id": 974697,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936755413.642, "dur": 1.053, + "args": { + "External id": 974698,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936755416.789, "dur": 3.800, + "args": { + "External id": 974699,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936755422.372, "dur": 0.801, + "args": { + "External id": 974700,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936755424.809, "dur": 0.785, + "args": { + "External id": 974701,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936755426.995, "dur": 1.022, + "args": { + "External id": 974702,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936755429.807, "dur": 0.782, + "args": { + "External id": 974703,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936755432.449, "dur": 2.392, + "args": { + "External id": 974704,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936755436.771, "dur": 0.982, + "args": { + "External id": 974705,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936755441.681, "dur": 0.821, + "args": { + "External id": 974706,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936755464.272, "dur": 30833.487, + "args": { + "External id": 974707,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936755484.078, "dur": 30805.180, + "args": { + "External id": 974708,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936755505.150, "dur": 18.563, + "args": { + "External id": 974709,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936755527.837, "dur": 30721.478, + "args": { + "External id": 974710,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936755530.981, "dur": 30717.571, + "args": { + "External id": 974711,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936755538.166, "dur": 6.082, + "args": { + "External id": 974712,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936755546.190, "dur": 30698.822, + "args": { + "External id": 974713,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936786503.875, "dur": 35.296, + "args": { + "External id": 974714,"Sequence number": 10552285, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13758 + } + }, + { + "ph": "s", "id": 181, "pid": 2338708, "tid": 2338708, "ts": 6345936786503.875, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936786523.651, "dur": 9.729, + "args": { + "External id": 974715,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936786528.134, "dur": 5.023, + "args": { + "External id": 974716,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936786617.727, "dur": 82.772, + "args": { + "External id": 974717,"Record function id": 0, "Ev Idx": 13761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936786702.164, "dur": 1271.339, + "args": { + "External id": 974718,"Record function id": 0, "Ev Idx": 13762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936786748.685, "dur": 1209.344, + "args": { + "External id": 974719,"Sequence number": 10552286, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13763 + } + }, + { + "ph": "s", "id": 180, "pid": 2338708, "tid": 2338708, "ts": 6345936786748.685, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936786826.029, "dur": 52.841, + "args": { + "External id": 974720,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936786893.835, "dur": 137.594, + "args": { + "External id": 974721,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936787049.488, "dur": 88.011, + "args": { + "External id": 974722,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936787151.781, "dur": 33.765, + "args": { + "External id": 974723,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936787219.725, "dur": 30.496, + "args": { + "External id": 974724,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936787273.055, "dur": 18.590, + "args": { + "External id": 974725,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936787318.374, "dur": 156.002, + "args": { + "External id": 974726,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936787380.085, "dur": 13.849, + "args": { + "External id": 974727,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936787386.823, "dur": 6.190, + "args": { + "External id": 974728,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936787397.402, "dur": 4.663, + "args": { + "External id": 974729,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936787403.666, "dur": 1.274, + "args": { + "External id": 974730,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936787407.865, "dur": 5.989, + "args": { + "External id": 974731,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936787487.024, "dur": 55.053, + "args": { + "External id": 974732,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936787577.609, "dur": 35.154, + "args": { + "External id": 974733,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936787624.627, "dur": 47.589, + "args": { + "External id": 974734,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936787683.753, "dur": 39.632, + "args": { + "External id": 974735,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936787750.178, "dur": 29.937, + "args": { + "External id": 974736,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936787789.001, "dur": 39.191, + "args": { + "External id": 974737,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936787848.229, "dur": 19.941, + "args": { + "External id": 974738,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13782 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 2338708, "tid": 2338708, + "ts": 6345936788108.389, "dur": 91.389, + "args": { + "External id": 974739,"Record function id": 0, "Ev Idx": 13783 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936788291.243, "dur": 53.358, + "args": { + "External id": 974740,"Record function id": 0, "Ev Idx": 13784 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 2338708, "tid": 2338708, + "ts": 6345936788355.443, "dur": 31780.192, + "args": { + "External id": 974741,"Record function id": 0, "Ev Idx": 13785 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338708, "tid": 2338708, + "ts": 6345936788365.738, "dur": 1078.767, + "args": { + "External id": 974742,"Record function id": 0, "Ev Idx": 13786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936788458.043, "dur": 10.578, + "args": { + "External id": 974743,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936788483.875, "dur": 43.081, + "args": { + "External id": 974744,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788490.543, "dur": 3.108, + "args": { + "External id": 974745,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788498.457, "dur": 0.566, + "args": { + "External id": 974746,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788500.596, "dur": 0.455, + "args": { + "External id": 974747,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788503.139, "dur": 0.544, + "args": { + "External id": 974748,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788506.996, "dur": 0.576, + "args": { + "External id": 974749,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788509.138, "dur": 0.422, + "args": { + "External id": 974750,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788511.310, "dur": 4.315, + "args": { + "External id": 974751,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788517.316, "dur": 0.485, + "args": { + "External id": 974752,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788519.362, "dur": 0.605, + "args": { + "External id": 974753,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936788540.125, "dur": 64.756, + "args": { + "External id": 974754,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936788645.871, "dur": 143.850, + "args": { + "External id": 974755,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936788658.774, "dur": 4.915, + "args": { + "External id": 974756,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936788669.792, "dur": 12.057, + "args": { + "External id": 974757,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936788674.887, "dur": 6.504, + "args": { + "External id": 974758,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788679.177, "dur": 0.708, + "args": { + "External id": 974759,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936788690.132, "dur": 39.349, + "args": { + "External id": 974760,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788693.184, "dur": 2.328, + "args": { + "External id": 974761,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788697.217, "dur": 0.354, + "args": { + "External id": 974762,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788699.701, "dur": 0.612, + "args": { + "External id": 974763,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788704.288, "dur": 2.931, + "args": { + "External id": 974764,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788708.555, "dur": 0.343, + "args": { + "External id": 974765,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788710.781, "dur": 0.352, + "args": { + "External id": 974766,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788714.907, "dur": 0.330, + "args": { + "External id": 974767,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788716.611, "dur": 0.342, + "args": { + "External id": 974768,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936788723.129, "dur": 1.838, + "args": { + "External id": 974769,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936788741.624, "dur": 38.506, + "args": { + "External id": 974770,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936788854.250, "dur": 466.611, + "args": { + "External id": 974771,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936788889.708, "dur": 424.568, + "args": { + "External id": 974772,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13816, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936788901.461, "dur": 403.833, + "args": { + "External id": 974773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936789353.784, "dur": 3.429, + "args": { + "External id": 974774,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13818, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338708, "tid": 2338708, + "ts": 6345936789469.855, "dur": 30397.674, + "args": { + "External id": 974775,"Record function id": 0, "Ev Idx": 13819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936789588.014, "dur": 7.706, + "args": { + "External id": 974776,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936789599.949, "dur": 1.168, + "args": { + "External id": 974777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936789603.306, "dur": 3.870, + "args": { + "External id": 974778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936789608.977, "dur": 1.037, + "args": { + "External id": 974779,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936789611.681, "dur": 1.132, + "args": { + "External id": 974780,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936789614.273, "dur": 0.668, + "args": { + "External id": 974781,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936789616.897, "dur": 0.836, + "args": { + "External id": 974782,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936789619.751, "dur": 2.305, + "args": { + "External id": 974783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936789623.587, "dur": 0.888, + "args": { + "External id": 974784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936789628.215, "dur": 0.912, + "args": { + "External id": 974785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936789650.933, "dur": 30166.783, + "args": { + "External id": 974786,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936789676.651, "dur": 30132.349, + "args": { + "External id": 974787,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936789692.969, "dur": 17.796, + "args": { + "External id": 974788,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936789714.757, "dur": 30054.381, + "args": { + "External id": 974789,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936789717.721, "dur": 30050.726, + "args": { + "External id": 974790,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936789724.240, "dur": 6.176, + "args": { + "External id": 974791,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936789732.620, "dur": 30032.126, + "args": { + "External id": 974792,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936820034.298, "dur": 67.680, + "args": { + "External id": 974793,"Sequence number": 10552287, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13837 + } + }, + { + "ph": "s", "id": 179, "pid": 2338708, "tid": 2338708, "ts": 6345936820034.298, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936820082.063, "dur": 13.387, + "args": { + "External id": 974794,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936820089.244, "dur": 5.817, + "args": { + "External id": 974795,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936820183.519, "dur": 80.728, + "args": { + "External id": 974796,"Record function id": 0, "Ev Idx": 13840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936820265.846, "dur": 1274.284, + "args": { + "External id": 974797,"Record function id": 0, "Ev Idx": 13841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936820310.661, "dur": 1213.057, + "args": { + "External id": 974798,"Sequence number": 10552288, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13842 + } + }, + { + "ph": "s", "id": 178, "pid": 2338708, "tid": 2338708, "ts": 6345936820310.661, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936820390.474, "dur": 58.741, + "args": { + "External id": 974799,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936820464.386, "dur": 115.691, + "args": { + "External id": 974800,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936820593.175, "dur": 41.473, + "args": { + "External id": 974801,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936820643.841, "dur": 33.327, + "args": { + "External id": 974802,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936820707.844, "dur": 31.069, + "args": { + "External id": 974803,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936820762.913, "dur": 20.069, + "args": { + "External id": 974804,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936820808.838, "dur": 148.655, + "args": { + "External id": 974805,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936820866.444, "dur": 13.605, + "args": { + "External id": 974806,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936820873.026, "dur": 6.143, + "args": { + "External id": 974807,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936820883.343, "dur": 4.842, + "args": { + "External id": 974808,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936820889.551, "dur": 1.521, + "args": { + "External id": 974809,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936820893.835, "dur": 5.729, + "args": { + "External id": 974810,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936820969.796, "dur": 75.667, + "args": { + "External id": 974811,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936821124.752, "dur": 38.471, + "args": { + "External id": 974812,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936821175.438, "dur": 53.479, + "args": { + "External id": 974813,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936821241.060, "dur": 39.762, + "args": { + "External id": 974814,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936821309.167, "dur": 34.791, + "args": { + "External id": 974815,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936821353.318, "dur": 41.446, + "args": { + "External id": 974816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936821417.013, "dur": 20.405, + "args": { + "External id": 974817,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13861 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 2338708, "tid": 2338708, + "ts": 6345936821619.161, "dur": 93.902, + "args": { + "External id": 974818,"Record function id": 0, "Ev Idx": 13862 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936821800.810, "dur": 56.156, + "args": { + "External id": 974819,"Record function id": 0, "Ev Idx": 13863 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 2338708, "tid": 2338708, + "ts": 6345936821867.466, "dur": 31103.467, + "args": { + "External id": 974820,"Record function id": 0, "Ev Idx": 13864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338708, "tid": 2338708, + "ts": 6345936821878.580, "dur": 1098.142, + "args": { + "External id": 974821,"Record function id": 0, "Ev Idx": 13865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936821969.587, "dur": 9.465, + "args": { + "External id": 974822,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936821994.208, "dur": 105.592, + "args": { + "External id": 974823,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822000.784, "dur": 2.995, + "args": { + "External id": 974824,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822028.166, "dur": 0.938, + "args": { + "External id": 974825,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822032.565, "dur": 0.453, + "args": { + "External id": 974826,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822034.953, "dur": 0.643, + "args": { + "External id": 974827,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822039.239, "dur": 0.563, + "args": { + "External id": 974828,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822041.345, "dur": 0.446, + "args": { + "External id": 974829,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822043.699, "dur": 4.453, + "args": { + "External id": 974830,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822049.784, "dur": 0.377, + "args": { + "External id": 974831,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822051.667, "dur": 39.248, + "args": { + "External id": 974832,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936822114.653, "dur": 66.520, + "args": { + "External id": 974833,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936822224.066, "dur": 177.905, + "args": { + "External id": 974834,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936822238.429, "dur": 6.276, + "args": { + "External id": 974835,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936822251.015, "dur": 12.434, + "args": { + "External id": 974836,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936822256.194, "dur": 6.793, + "args": { + "External id": 974837,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822260.754, "dur": 0.766, + "args": { + "External id": 974838,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936822272.851, "dur": 35.960, + "args": { + "External id": 974839,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822275.950, "dur": 2.490, + "args": { + "External id": 974840,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822280.502, "dur": 0.438, + "args": { + "External id": 974841,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822283.019, "dur": 0.600, + "args": { + "External id": 974842,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822287.659, "dur": 2.558, + "args": { + "External id": 974843,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822292.089, "dur": 0.339, + "args": { + "External id": 974844,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822293.957, "dur": 0.323, + "args": { + "External id": 974845,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822298.038, "dur": 0.390, + "args": { + "External id": 974846,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822299.992, "dur": 0.288, + "args": { + "External id": 974847,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936822302.069, "dur": 2.089, + "args": { + "External id": 974848,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936822351.367, "dur": 41.005, + "args": { + "External id": 974849,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936822468.061, "dur": 400.588, + "args": { + "External id": 974850,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936822503.125, "dur": 360.215, + "args": { + "External id": 974851,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13895, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936822516.307, "dur": 340.666, + "args": { + "External id": 974852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936822896.937, "dur": 2.758, + "args": { + "External id": 974853,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13897, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338708, "tid": 2338708, + "ts": 6345936823000.343, "dur": 29750.222, + "args": { + "External id": 974854,"Record function id": 0, "Ev Idx": 13898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936823181.321, "dur": 8.272, + "args": { + "External id": 974855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936823193.963, "dur": 1.191, + "args": { + "External id": 974856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936823197.109, "dur": 3.499, + "args": { + "External id": 974857,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936823202.936, "dur": 0.786, + "args": { + "External id": 974858,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936823205.489, "dur": 0.824, + "args": { + "External id": 974859,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936823207.723, "dur": 1.042, + "args": { + "External id": 974860,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936823210.543, "dur": 0.741, + "args": { + "External id": 974861,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936823213.489, "dur": 2.441, + "args": { + "External id": 974862,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936823217.663, "dur": 1.026, + "args": { + "External id": 974863,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936823222.804, "dur": 0.703, + "args": { + "External id": 974864,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936823246.230, "dur": 29455.106, + "args": { + "External id": 974865,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936823264.810, "dur": 29427.988, + "args": { + "External id": 974866,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936823288.099, "dur": 17.919, + "args": { + "External id": 974867,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936823309.617, "dur": 29342.582, + "args": { + "External id": 974868,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936823312.618, "dur": 29338.931, + "args": { + "External id": 974869,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936823319.704, "dur": 6.309, + "args": { + "External id": 974870,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936823328.141, "dur": 29319.792, + "args": { + "External id": 974871,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936852903.840, "dur": 37.226, + "args": { + "External id": 974872,"Sequence number": 10552289, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13916 + } + }, + { + "ph": "s", "id": 177, "pid": 2338708, "tid": 2338708, "ts": 6345936852903.840, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936852926.114, "dur": 9.585, + "args": { + "External id": 974873,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936852930.645, "dur": 4.870, + "args": { + "External id": 974874,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936853028.914, "dur": 115.692, + "args": { + "External id": 974875,"Record function id": 0, "Ev Idx": 13919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936853147.867, "dur": 1281.472, + "args": { + "External id": 974876,"Record function id": 0, "Ev Idx": 13920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936853198.264, "dur": 1214.331, + "args": { + "External id": 974877,"Sequence number": 10552290, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 13921 + } + }, + { + "ph": "s", "id": 176, "pid": 2338708, "tid": 2338708, "ts": 6345936853198.264, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936853278.216, "dur": 56.103, + "args": { + "External id": 974878,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936853349.217, "dur": 114.319, + "args": { + "External id": 974879,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936853476.599, "dur": 39.270, + "args": { + "External id": 974880,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936853525.121, "dur": 33.401, + "args": { + "External id": 974881,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 13925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936853586.769, "dur": 30.130, + "args": { + "External id": 974882,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936853640.229, "dur": 20.633, + "args": { + "External id": 974883,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 13927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936853686.194, "dur": 155.052, + "args": { + "External id": 974884,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 13928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936853746.250, "dur": 14.408, + "args": { + "External id": 974885,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 13929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936853753.231, "dur": 6.429, + "args": { + "External id": 974886,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936853764.106, "dur": 4.361, + "args": { + "External id": 974887,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936853770.073, "dur": 1.400, + "args": { + "External id": 974888,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936853774.113, "dur": 5.772, + "args": { + "External id": 974889,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936853854.027, "dur": 56.140, + "args": { + "External id": 974890,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 13934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936853945.981, "dur": 34.608, + "args": { + "External id": 974891,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 13935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936853991.498, "dur": 109.451, + "args": { + "External id": 974892,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936854117.865, "dur": 46.852, + "args": { + "External id": 974893,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 13937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936854192.894, "dur": 35.541, + "args": { + "External id": 974894,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 13938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936854238.124, "dur": 41.550, + "args": { + "External id": 974895,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 13939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936854302.391, "dur": 20.333, + "args": { + "External id": 974896,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 13940 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 2338708, "tid": 2338708, + "ts": 6345936854510.095, "dur": 92.097, + "args": { + "External id": 974897,"Record function id": 0, "Ev Idx": 13941 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936854690.506, "dur": 53.086, + "args": { + "External id": 974898,"Record function id": 0, "Ev Idx": 13942 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 2338708, "tid": 2338708, + "ts": 6345936854754.799, "dur": 32731.680, + "args": { + "External id": 974899,"Record function id": 0, "Ev Idx": 13943 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338708, "tid": 2338708, + "ts": 6345936854765.553, "dur": 1080.691, + "args": { + "External id": 974900,"Record function id": 0, "Ev Idx": 13944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936854860.312, "dur": 9.535, + "args": { + "External id": 974901,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936854884.513, "dur": 41.884, + "args": { + "External id": 974902,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936854891.139, "dur": 2.588, + "args": { + "External id": 974903,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936854898.677, "dur": 0.483, + "args": { + "External id": 974904,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936854900.620, "dur": 0.550, + "args": { + "External id": 974905,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936854902.867, "dur": 0.745, + "args": { + "External id": 974906,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936854906.558, "dur": 0.526, + "args": { + "External id": 974907,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936854908.440, "dur": 0.597, + "args": { + "External id": 974908,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936854910.905, "dur": 4.027, + "args": { + "External id": 974909,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936854916.547, "dur": 0.516, + "args": { + "External id": 974910,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936854918.792, "dur": 0.645, + "args": { + "External id": 974911,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936854940.213, "dur": 59.235, + "args": { + "External id": 974912,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936855103.306, "dur": 153.366, + "args": { + "External id": 974913,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 13957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936855118.074, "dur": 7.923, + "args": { + "External id": 974914,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936855133.172, "dur": 13.452, + "args": { + "External id": 974915,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936855138.531, "dur": 7.622, + "args": { + "External id": 974916,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 13960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936855143.363, "dur": 0.834, + "args": { + "External id": 974917,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 13961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936855156.626, "dur": 33.556, + "args": { + "External id": 974918,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 13962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936855159.544, "dur": 2.161, + "args": { + "External id": 974919,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936855163.591, "dur": 0.562, + "args": { + "External id": 974920,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936855166.135, "dur": 0.492, + "args": { + "External id": 974921,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936855170.692, "dur": 3.198, + "args": { + "External id": 974922,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936855175.307, "dur": 0.345, + "args": { + "External id": 974923,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936855177.186, "dur": 0.427, + "args": { + "External id": 974924,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936855180.339, "dur": 0.296, + "args": { + "External id": 974925,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936855181.952, "dur": 0.452, + "args": { + "External id": 974926,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936855183.621, "dur": 2.129, + "args": { + "External id": 974927,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 13971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936855204.432, "dur": 42.332, + "args": { + "External id": 974928,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 13972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936855323.969, "dur": 414.337, + "args": { + "External id": 974929,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 13973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936855367.681, "dur": 364.866, + "args": { + "External id": 974930,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 13974, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936855379.848, "dur": 346.404, + "args": { + "External id": 974931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 13975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936855765.579, "dur": 2.809, + "args": { + "External id": 974932,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 13976, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338708, "tid": 2338708, + "ts": 6345936855870.430, "dur": 31391.950, + "args": { + "External id": 974933,"Record function id": 0, "Ev Idx": 13977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936855980.323, "dur": 6.876, + "args": { + "External id": 974934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 13978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936855997.408, "dur": 1.259, + "args": { + "External id": 974935,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936856000.524, "dur": 4.010, + "args": { + "External id": 974936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936856029.700, "dur": 1.895, + "args": { + "External id": 974937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936856035.530, "dur": 0.814, + "args": { + "External id": 974938,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 13982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936856037.768, "dur": 0.875, + "args": { + "External id": 974939,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 13983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936856040.392, "dur": 0.998, + "args": { + "External id": 974940,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 13984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936856045.269, "dur": 2.383, + "args": { + "External id": 974941,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936856049.454, "dur": 0.799, + "args": { + "External id": 974942,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936856087.784, "dur": 2.044, + "args": { + "External id": 974943,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 13987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936856115.730, "dur": 31095.989, + "args": { + "External id": 974944,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936856133.939, "dur": 31068.888, + "args": { + "External id": 974945,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 13989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936856150.531, "dur": 18.184, + "args": { + "External id": 974946,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936856175.673, "dur": 30986.772, + "args": { + "External id": 974947,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 13991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936856178.643, "dur": 30983.095, + "args": { + "External id": 974948,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 13992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936856184.728, "dur": 7.116, + "args": { + "External id": 974949,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 13993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936856193.766, "dur": 30964.106, + "args": { + "External id": 974950,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 13994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936887420.545, "dur": 36.496, + "args": { + "External id": 974951,"Sequence number": 10552291, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 13995 + } + }, + { + "ph": "s", "id": 175, "pid": 2338708, "tid": 2338708, "ts": 6345936887420.545, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936887441.160, "dur": 10.005, + "args": { + "External id": 974952,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 13996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936887445.765, "dur": 5.180, + "args": { + "External id": 974953,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 13997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936887530.590, "dur": 82.187, + "args": { + "External id": 974954,"Record function id": 0, "Ev Idx": 13998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936887614.638, "dur": 1272.855, + "args": { + "External id": 974955,"Record function id": 0, "Ev Idx": 13999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936887657.580, "dur": 1213.136, + "args": { + "External id": 974956,"Sequence number": 10552292, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 14000 + } + }, + { + "ph": "s", "id": 174, "pid": 2338708, "tid": 2338708, "ts": 6345936887657.580, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936887733.485, "dur": 55.101, + "args": { + "External id": 974957,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936887803.305, "dur": 118.020, + "args": { + "External id": 974958,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936887934.975, "dur": 40.808, + "args": { + "External id": 974959,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936887985.610, "dur": 54.785, + "args": { + "External id": 974960,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936888115.171, "dur": 38.599, + "args": { + "External id": 974961,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936888180.705, "dur": 21.076, + "args": { + "External id": 974962,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936888227.170, "dur": 154.092, + "args": { + "External id": 974963,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936888287.792, "dur": 14.855, + "args": { + "External id": 974964,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936888294.851, "dur": 6.781, + "args": { + "External id": 974965,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936888305.845, "dur": 4.540, + "args": { + "External id": 974966,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936888311.823, "dur": 1.424, + "args": { + "External id": 974967,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936888315.998, "dur": 6.156, + "args": { + "External id": 974968,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936888394.945, "dur": 62.375, + "args": { + "External id": 974969,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936888493.100, "dur": 32.529, + "args": { + "External id": 974970,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936888536.934, "dur": 46.719, + "args": { + "External id": 974971,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936888595.500, "dur": 39.480, + "args": { + "External id": 974972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936888670.656, "dur": 29.600, + "args": { + "External id": 974973,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 14017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936888708.991, "dur": 40.363, + "args": { + "External id": 974974,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 14018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936888771.271, "dur": 19.233, + "args": { + "External id": 974975,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 14019 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 2338708, "tid": 2338708, + "ts": 6345936888964.017, "dur": 158.052, + "args": { + "External id": 974976,"Record function id": 0, "Ev Idx": 14020 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345936889222.411, "dur": 57.921, + "args": { + "External id": 974977,"Record function id": 0, "Ev Idx": 14021 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 2338708, "tid": 2338708, + "ts": 6345936889291.545, "dur": 31459.626, + "args": { + "External id": 974978,"Record function id": 0, "Ev Idx": 14022 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2338708, "tid": 2338708, + "ts": 6345936889300.758, "dur": 1010.396, + "args": { + "External id": 974979,"Record function id": 0, "Ev Idx": 14023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936889398.784, "dur": 11.437, + "args": { + "External id": 974980,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936889425.494, "dur": 40.825, + "args": { + "External id": 974981,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 14025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889431.921, "dur": 2.750, + "args": { + "External id": 974982,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889439.363, "dur": 0.425, + "args": { + "External id": 974983,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889441.346, "dur": 0.763, + "args": { + "External id": 974984,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889443.600, "dur": 0.662, + "args": { + "External id": 974985,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889447.453, "dur": 0.506, + "args": { + "External id": 974986,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889449.532, "dur": 0.420, + "args": { + "External id": 974987,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889451.730, "dur": 4.396, + "args": { + "External id": 974988,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889457.697, "dur": 0.300, + "args": { + "External id": 974989,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889459.119, "dur": 0.343, + "args": { + "External id": 974990,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936889479.421, "dur": 64.888, + "args": { + "External id": 974991,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 14035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345936889585.561, "dur": 139.681, + "args": { + "External id": 974992,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 14036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936889598.265, "dur": 4.982, + "args": { + "External id": 974993,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345936889609.411, "dur": 11.906, + "args": { + "External id": 974994,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 14038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936889614.673, "dur": 6.173, + "args": { + "External id": 974995,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 14039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889618.733, "dur": 0.677, + "args": { + "External id": 974996,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 14040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345936889629.291, "dur": 31.076, + "args": { + "External id": 974997,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 14041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889631.406, "dur": 2.331, + "args": { + "External id": 974998,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889635.084, "dur": 0.494, + "args": { + "External id": 974999,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889637.404, "dur": 0.450, + "args": { + "External id": 975000,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889640.799, "dur": 2.992, + "args": { + "External id": 975001,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889645.427, "dur": 0.393, + "args": { + "External id": 975002,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889647.497, "dur": 0.344, + "args": { + "External id": 975003,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889650.629, "dur": 0.326, + "args": { + "External id": 975004,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889652.130, "dur": 0.297, + "args": { + "External id": 975005,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936889653.719, "dur": 2.262, + "args": { + "External id": 975006,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 14050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936889675.333, "dur": 40.700, + "args": { + "External id": 975007,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 14051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345936889790.141, "dur": 403.554, + "args": { + "External id": 975008,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 14052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936889825.229, "dur": 361.852, + "args": { + "External id": 975009,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 14053, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345936889836.466, "dur": 343.663, + "args": { + "External id": 975010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 14054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345936890223.737, "dur": 3.000, + "args": { + "External id": 975011,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 14055, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2338708, "tid": 2338708, + "ts": 6345936890336.817, "dur": 30189.541, + "args": { + "External id": 975012,"Record function id": 0, "Ev Idx": 14056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936890454.461, "dur": 7.762, + "args": { + "External id": 975013,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 14057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936890466.773, "dur": 1.192, + "args": { + "External id": 975014,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936890469.996, "dur": 3.442, + "args": { + "External id": 975015,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 14059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936890475.457, "dur": 0.777, + "args": { + "External id": 975016,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 14060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936890477.701, "dur": 0.881, + "args": { + "External id": 975017,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 14061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936890479.941, "dur": 0.949, + "args": { + "External id": 975018,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 14062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936890485.426, "dur": 0.754, + "args": { + "External id": 975019,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936890487.875, "dur": 2.321, + "args": { + "External id": 975020,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936890491.939, "dur": 0.722, + "args": { + "External id": 975021,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936890494.260, "dur": 0.727, + "args": { + "External id": 975022,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 14066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936890517.546, "dur": 29960.715, + "args": { + "External id": 975023,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 14067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936890535.672, "dur": 29933.287, + "args": { + "External id": 975024,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 14068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936890560.142, "dur": 19.114, + "args": { + "External id": 975025,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936890583.310, "dur": 29845.643, + "args": { + "External id": 975026,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 14070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936890586.276, "dur": 29841.693, + "args": { + "External id": 975027,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 14071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936890593.535, "dur": 6.260, + "args": { + "External id": 975028,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936890601.946, "dur": 29822.507, + "args": { + "External id": 975029,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 14073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936920685.656, "dur": 35.319, + "args": { + "External id": 975030,"Sequence number": 10552293, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 14074 + } + }, + { + "ph": "s", "id": 173, "pid": 2338708, "tid": 2338708, "ts": 6345936920685.656, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936920705.042, "dur": 10.101, + "args": { + "External id": 975031,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 14075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936920709.331, "dur": 5.532, + "args": { + "External id": 975032,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936920795.501, "dur": 78.690, + "args": { + "External id": 975033,"Record function id": 0, "Ev Idx": 14077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345936920875.742, "dur": 1301.912, + "args": { + "External id": 975034,"Record function id": 0, "Ev Idx": 14078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936920921.049, "dur": 1239.999, + "args": { + "External id": 975035,"Sequence number": 10552294, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 14079 + } + }, + { + "ph": "s", "id": 172, "pid": 2338708, "tid": 2338708, "ts": 6345936920921.049, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936920993.364, "dur": 105.355, + "args": { + "External id": 975036,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936921118.296, "dur": 115.885, + "args": { + "External id": 975037,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936921247.255, "dur": 40.725, + "args": { + "External id": 975038,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936921297.683, "dur": 33.720, + "args": { + "External id": 975039,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 14083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936921363.170, "dur": 30.693, + "args": { + "External id": 975040,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345936921418.677, "dur": 23.489, + "args": { + "External id": 975041,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 14085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936921469.144, "dur": 154.127, + "args": { + "External id": 975042,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936921529.977, "dur": 14.366, + "args": { + "External id": 975043,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936921536.797, "dur": 6.527, + "args": { + "External id": 975044,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936921547.754, "dur": 4.627, + "args": { + "External id": 975045,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936921554.203, "dur": 1.513, + "args": { + "External id": 975046,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936921558.773, "dur": 5.045, + "args": { + "External id": 975047,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936921636.282, "dur": 54.742, + "args": { + "External id": 975048,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 14092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345936921726.160, "dur": 35.828, + "args": { + "External id": 975049,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 14093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936921772.971, "dur": 47.778, + "args": { + "External id": 975050,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936921832.179, "dur": 39.426, + "args": { + "External id": 975051,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 14095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936921897.598, "dur": 29.407, + "args": { + "External id": 975052,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 14096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936921935.628, "dur": 40.227, + "args": { + "External id": 975053,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 14097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345936921996.474, "dur": 37.202, + "args": { + "External id": 975054,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 14098 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 2338708, "tid": 2338708, + "ts": 6345936922258.519, "dur": 38.215, + "args": { + "External id": 975055,"Record function id": 0, "Ev Idx": 14099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936922450.122, "dur": 328.244, + "args": { + "External id": 975056,"Sequence number": 10552295, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14100 + } + }, + { + "ph": "s", "id": 171, "pid": 2338708, "tid": 2338708, "ts": 6345936922450.122, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936922485.684, "dur": 10.414, + "args": { + "External id": 975057,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936922488.576, "dur": 7.231, + "args": { + "External id": 975058,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936922507.325, "dur": 16.450, + "args": { + "External id": 975059,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936922511.766, "dur": 11.248, + "args": { + "External id": 975060,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936922535.719, "dur": 4.830, + "args": { + "External id": 975061,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936922756.390, "dur": 6.944, + "args": { + "External id": 975062,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936922760.049, "dur": 3.044, + "args": { + "External id": 975063,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936922810.959, "dur": 166.241, + "args": { + "External id": 975064,"Sequence number": 10552296, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936922813.659, "dur": 17.420, + "args": { + "External id": 975065,"Sequence number": 10552296, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14109 + } + }, + { + "ph": "s", "id": 170, "pid": 2338708, "tid": 2338708, "ts": 6345936922813.659, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936922819.688, "dur": 9.262, + "args": { + "External id": 975066,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936922825.762, "dur": 2.738, + "args": { + "External id": 975067,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936922833.931, "dur": 142.891, + "args": { + "External id": 975068,"Sequence number": 10552297, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936922837.086, "dur": 5.608, + "args": { + "External id": 975069,"Sequence number": 10552297, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936922838.242, "dur": 4.300, + "args": { + "External id": 975070,"Sequence number": 10552297, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14114 + } + }, + { + "ph": "s", "id": 169, "pid": 2338708, "tid": 2338708, "ts": 6345936922838.242, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936922844.681, "dur": 113.174, + "args": { + "External id": 975071,"Sequence number": 10552298, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14115 + } + }, + { + "ph": "s", "id": 168, "pid": 2338708, "tid": 2338708, "ts": 6345936922844.681, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936922965.907, "dur": 9.630, + "args": { + "External id": 975072,"Sequence number": 10552299, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14116 + } + }, + { + "ph": "s", "id": 167, "pid": 2338708, "tid": 2338708, "ts": 6345936922965.907, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936922988.349, "dur": 158.172, + "args": { + "External id": 975073,"Sequence number": 10552300, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936922989.376, "dur": 8.978, + "args": { + "External id": 975074,"Sequence number": 10552300, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14118 + } + }, + { + "ph": "s", "id": 166, "pid": 2338708, "tid": 2338708, "ts": 6345936922989.376, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936922991.495, "dur": 5.354, + "args": { + "External id": 975075,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936922995.738, "dur": 0.870, + "args": { + "External id": 975076,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936922998.947, "dur": 147.221, + "args": { + "External id": 975077,"Sequence number": 10552301, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936923000.679, "dur": 5.436, + "args": { + "External id": 975078,"Sequence number": 10552301, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936923001.713, "dur": 4.252, + "args": { + "External id": 975079,"Sequence number": 10552301, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14123 + } + }, + { + "ph": "s", "id": 165, "pid": 2338708, "tid": 2338708, "ts": 6345936923001.713, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936923007.076, "dur": 128.778, + "args": { + "External id": 975080,"Sequence number": 10552302, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14124 + } + }, + { + "ph": "s", "id": 164, "pid": 2338708, "tid": 2338708, "ts": 6345936923007.076, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936923139.812, "dur": 5.646, + "args": { + "External id": 975081,"Sequence number": 10552303, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14125 + } + }, + { + "ph": "s", "id": 163, "pid": 2338708, "tid": 2338708, "ts": 6345936923139.812, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936923159.158, "dur": 76.857, + "args": { + "External id": 975082,"Sequence number": 10552304, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936923160.007, "dur": 7.629, + "args": { + "External id": 975083,"Sequence number": 10552304, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14127 + } + }, + { + "ph": "s", "id": 162, "pid": 2338708, "tid": 2338708, "ts": 6345936923160.007, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936923162.348, "dur": 3.548, + "args": { + "External id": 975084,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936923164.714, "dur": 0.881, + "args": { + "External id": 975085,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936923171.030, "dur": 64.698, + "args": { + "External id": 975086,"Sequence number": 10552305, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936923172.158, "dur": 5.413, + "args": { + "External id": 975087,"Sequence number": 10552305, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936923173.104, "dur": 4.265, + "args": { + "External id": 975088,"Sequence number": 10552305, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14132 + } + }, + { + "ph": "s", "id": 161, "pid": 2338708, "tid": 2338708, "ts": 6345936923173.104, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936923178.105, "dur": 49.196, + "args": { + "External id": 975089,"Sequence number": 10552306, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14133 + } + }, + { + "ph": "s", "id": 160, "pid": 2338708, "tid": 2338708, "ts": 6345936923178.105, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936923229.698, "dur": 5.588, + "args": { + "External id": 975090,"Sequence number": 10552307, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14134 + } + }, + { + "ph": "s", "id": 159, "pid": 2338708, "tid": 2338708, "ts": 6345936923229.698, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936923262.889, "dur": 4.696, + "args": { + "External id": 975091,"Sequence number": 10552308, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936923264.256, "dur": 3.143, + "args": { + "External id": 975092,"Sequence number": 10552308, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14136 + } + }, + { + "ph": "s", "id": 158, "pid": 2338708, "tid": 2338708, "ts": 6345936923264.256, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936923277.729, "dur": 3.366, + "args": { + "External id": 975093,"Sequence number": 10552309, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936923278.768, "dur": 2.174, + "args": { + "External id": 975094,"Sequence number": 10552309, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14138 + } + }, + { + "ph": "s", "id": 157, "pid": 2338708, "tid": 2338708, "ts": 6345936923278.768, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936923288.077, "dur": 5.622, + "args": { + "External id": 975095,"Sequence number": 10552310, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936923289.695, "dur": 3.830, + "args": { + "External id": 975096,"Sequence number": 10552310, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14140 + } + }, + { + "ph": "s", "id": 156, "pid": 2338708, "tid": 2338708, "ts": 6345936923289.695, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936923338.529, "dur": 219.226, + "args": { + "External id": 975097,"Sequence number": 10552311, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14141 + } + }, + { + "ph": "s", "id": 155, "pid": 2338708, "tid": 2338708, "ts": 6345936923338.529, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936923367.651, "dur": 11.466, + "args": { + "External id": 975098,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936923372.110, "dur": 6.373, + "args": { + "External id": 975099,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936923573.811, "dur": 133.124, + "args": { + "External id": 975100,"Sequence number": 10552312, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14144 + } + }, + { + "ph": "s", "id": 154, "pid": 2338708, "tid": 2338708, "ts": 6345936923573.811, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936923590.336, "dur": 7.847, + "args": { + "External id": 975101,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936923593.326, "dur": 4.468, + "args": { + "External id": 975102,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338708, "tid": 2338708, + "ts": 6345936923743.055, "dur": 223.622, + "args": { + "External id": 975103,"Sequence number": 10552313, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14147 + } + }, + { + "ph": "s", "id": 153, "pid": 2338708, "tid": 2338708, "ts": 6345936923743.055, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936923775.513, "dur": 157.398, + "args": { + "External id": 975104,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936923840.464, "dur": 8.850, + "args": { + "External id": 975105,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936923843.796, "dur": 4.949, + "args": { + "External id": 975106,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936923856.796, "dur": 4.450, + "args": { + "External id": 975107,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936923865.435, "dur": 1.183, + "args": { + "External id": 975108,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936923869.306, "dur": 3.691, + "args": { + "External id": 975109,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2338708, + "ts": 6345936923950.355, "dur": 5.810, + "args": { + "External id": 975110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936923972.793, "dur": 6.238, + "args": { + "External id": 975111,"Sequence number": 10552314, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936923974.622, "dur": 4.163, + "args": { + "External id": 975112,"Sequence number": 10552314, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14156 + } + }, + { + "ph": "s", "id": 152, "pid": 2338708, "tid": 2338708, "ts": 6345936923974.622, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936923995.231, "dur": 201.677, + "args": { + "External id": 975113,"Sequence number": 10552315, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936923996.986, "dur": 8.974, + "args": { + "External id": 975114,"Sequence number": 10552315, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14158 + } + }, + { + "ph": "s", "id": 151, "pid": 2338708, "tid": 2338708, "ts": 6345936923996.986, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936923999.842, "dur": 4.710, + "args": { + "External id": 975115,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936924002.551, "dur": 1.684, + "args": { + "External id": 975116,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936924028.377, "dur": 168.201, + "args": { + "External id": 975117,"Sequence number": 10552316, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936924034.250, "dur": 5.674, + "args": { + "External id": 975118,"Sequence number": 10552316, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936924035.379, "dur": 4.234, + "args": { + "External id": 975119,"Sequence number": 10552316, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14163 + } + }, + { + "ph": "s", "id": 150, "pid": 2338708, "tid": 2338708, "ts": 6345936924035.379, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936924041.117, "dur": 145.213, + "args": { + "External id": 975120,"Sequence number": 10552317, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14164 + } + }, + { + "ph": "s", "id": 149, "pid": 2338708, "tid": 2338708, "ts": 6345936924041.117, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936924191.319, "dur": 4.254, + "args": { + "External id": 975121,"Sequence number": 10552318, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14165 + } + }, + { + "ph": "s", "id": 148, "pid": 2338708, "tid": 2338708, "ts": 6345936924191.319, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936924244.854, "dur": 272.000, + "args": { + "External id": 975122,"Sequence number": 10552319, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14166 + } + }, + { + "ph": "s", "id": 147, "pid": 2338708, "tid": 2338708, "ts": 6345936924244.854, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936924272.159, "dur": 3.733, + "args": { + "External id": 975123,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936924273.650, "dur": 2.076, + "args": { + "External id": 975124,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936924281.250, "dur": 8.119, + "args": { + "External id": 975125,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936924282.581, "dur": 6.640, + "args": { + "External id": 975126,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936924285.738, "dur": 3.357, + "args": { + "External id": 975127,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936924299.410, "dur": 9.252, + "args": { + "External id": 975128,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936924302.373, "dur": 5.912, + "args": { + "External id": 975129,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936924316.119, "dur": 3.322, + "args": { + "External id": 975130,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936924325.779, "dur": 3.401, + "args": { + "External id": 975131,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936924488.880, "dur": 3.927, + "args": { + "External id": 975132,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936924490.425, "dur": 2.189, + "args": { + "External id": 975133,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936924498.045, "dur": 2.228, + "args": { + "External id": 975134,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936924498.892, "dur": 1.274, + "args": { + "External id": 975135,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936924540.428, "dur": 114.425, + "args": { + "External id": 975136,"Sequence number": 10552320, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936924541.861, "dur": 9.909, + "args": { + "External id": 975137,"Sequence number": 10552320, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14181 + } + }, + { + "ph": "s", "id": 146, "pid": 2338708, "tid": 2338708, "ts": 6345936924541.861, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936924545.224, "dur": 5.035, + "args": { + "External id": 975138,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936924547.676, "dur": 2.139, + "args": { + "External id": 975139,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936924555.213, "dur": 99.204, + "args": { + "External id": 975140,"Sequence number": 10552321, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936924557.021, "dur": 3.916, + "args": { + "External id": 975141,"Sequence number": 10552321, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936924558.068, "dur": 2.723, + "args": { + "External id": 975142,"Sequence number": 10552321, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14186 + } + }, + { + "ph": "s", "id": 145, "pid": 2338708, "tid": 2338708, "ts": 6345936924558.068, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936924561.919, "dur": 79.381, + "args": { + "External id": 975143,"Sequence number": 10552322, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14187 + } + }, + { + "ph": "s", "id": 144, "pid": 2338708, "tid": 2338708, "ts": 6345936924561.919, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936924643.945, "dur": 9.711, + "args": { + "External id": 975144,"Sequence number": 10552323, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14188 + } + }, + { + "ph": "s", "id": 143, "pid": 2338708, "tid": 2338708, "ts": 6345936924643.945, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936924664.631, "dur": 75.414, + "args": { + "External id": 975145,"Sequence number": 10552324, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936924665.624, "dur": 6.450, + "args": { + "External id": 975146,"Sequence number": 10552324, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14190 + } + }, + { + "ph": "s", "id": 142, "pid": 2338708, "tid": 2338708, "ts": 6345936924665.624, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936924667.209, "dur": 3.545, + "args": { + "External id": 975147,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936924669.231, "dur": 1.192, + "args": { + "External id": 975148,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936924672.680, "dur": 67.116, + "args": { + "External id": 975149,"Sequence number": 10552325, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936924676.247, "dur": 5.344, + "args": { + "External id": 975150,"Sequence number": 10552325, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936924677.305, "dur": 4.135, + "args": { + "External id": 975151,"Sequence number": 10552325, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14195 + } + }, + { + "ph": "s", "id": 141, "pid": 2338708, "tid": 2338708, "ts": 6345936924677.305, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936924682.318, "dur": 53.065, + "args": { + "External id": 975152,"Sequence number": 10552326, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14196 + } + }, + { + "ph": "s", "id": 140, "pid": 2338708, "tid": 2338708, "ts": 6345936924682.318, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936924737.425, "dur": 1.975, + "args": { + "External id": 975153,"Sequence number": 10552327, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14197 + } + }, + { + "ph": "s", "id": 139, "pid": 2338708, "tid": 2338708, "ts": 6345936924737.425, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936924768.577, "dur": 184.620, + "args": { + "External id": 975154,"Sequence number": 10552328, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14198 + } + }, + { + "ph": "s", "id": 138, "pid": 2338708, "tid": 2338708, "ts": 6345936924768.577, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936924818.035, "dur": 4.796, + "args": { + "External id": 975155,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936924862.319, "dur": 75.769, + "args": { + "External id": 975156,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936924863.365, "dur": 7.997, + "args": { + "External id": 975157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936924864.544, "dur": 5.626, + "args": { + "External id": 975158,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936924868.914, "dur": 1.051, + "args": { + "External id": 975159,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936924874.846, "dur": 62.766, + "args": { + "External id": 975160,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936924876.614, "dur": 3.094, + "args": { + "External id": 975161,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936924878.155, "dur": 1.388, + "args": { + "External id": 975162,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936924880.318, "dur": 53.050, + "args": { + "External id": 975163,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936924935.748, "dur": 1.171, + "args": { + "External id": 975164,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345936924965.010, "dur": 29.987, + "args": { + "External id": 975165,"Sequence number": 10552329, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14209 + } + }, + { + "ph": "s", "id": 137, "pid": 2338708, "tid": 2338708, "ts": 6345936924965.010, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936925096.890, "dur": 233.212, + "args": { + "External id": 975166,"Sequence number": 10552330, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14210 + } + }, + { + "ph": "s", "id": 136, "pid": 2338708, "tid": 2338708, "ts": 6345936925096.890, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936925125.656, "dur": 5.413, + "args": { + "External id": 975167,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936925126.771, "dur": 3.957, + "args": { + "External id": 975168,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936925140.671, "dur": 9.859, + "args": { + "External id": 975169,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936925144.146, "dur": 5.949, + "args": { + "External id": 975170,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936925157.951, "dur": 4.214, + "args": { + "External id": 975171,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936925308.912, "dur": 3.563, + "args": { + "External id": 975172,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936925309.966, "dur": 2.383, + "args": { + "External id": 975173,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936925352.362, "dur": 109.304, + "args": { + "External id": 975174,"Sequence number": 10552331, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936925353.747, "dur": 8.712, + "args": { + "External id": 975175,"Sequence number": 10552331, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14219 + } + }, + { + "ph": "s", "id": 135, "pid": 2338708, "tid": 2338708, "ts": 6345936925353.747, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936925356.664, "dur": 4.226, + "args": { + "External id": 975176,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936925359.025, "dur": 1.618, + "args": { + "External id": 975177,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936925363.928, "dur": 97.332, + "args": { + "External id": 975178,"Sequence number": 10552332, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936925368.136, "dur": 6.489, + "args": { + "External id": 975179,"Sequence number": 10552332, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936925368.919, "dur": 5.517, + "args": { + "External id": 975180,"Sequence number": 10552332, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14224 + } + }, + { + "ph": "s", "id": 134, "pid": 2338708, "tid": 2338708, "ts": 6345936925368.919, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936925375.595, "dur": 76.741, + "args": { + "External id": 975181,"Sequence number": 10552333, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14225 + } + }, + { + "ph": "s", "id": 133, "pid": 2338708, "tid": 2338708, "ts": 6345936925375.595, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936925455.042, "dur": 5.302, + "args": { + "External id": 975182,"Sequence number": 10552334, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14226 + } + }, + { + "ph": "s", "id": 132, "pid": 2338708, "tid": 2338708, "ts": 6345936925455.042, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936925470.788, "dur": 78.248, + "args": { + "External id": 975183,"Sequence number": 10552335, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936925474.771, "dur": 9.318, + "args": { + "External id": 975184,"Sequence number": 10552335, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14228 + } + }, + { + "ph": "s", "id": 131, "pid": 2338708, "tid": 2338708, "ts": 6345936925474.771, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936925479.257, "dur": 3.611, + "args": { + "External id": 975185,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936925481.621, "dur": 0.788, + "args": { + "External id": 975186,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936925485.449, "dur": 63.221, + "args": { + "External id": 975187,"Sequence number": 10552336, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936925486.643, "dur": 6.048, + "args": { + "External id": 975188,"Sequence number": 10552336, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936925490.009, "dur": 2.504, + "args": { + "External id": 975189,"Sequence number": 10552336, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14233 + } + }, + { + "ph": "s", "id": 130, "pid": 2338708, "tid": 2338708, "ts": 6345936925490.009, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936925493.457, "dur": 47.323, + "args": { + "External id": 975190,"Sequence number": 10552337, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14234 + } + }, + { + "ph": "s", "id": 129, "pid": 2338708, "tid": 2338708, "ts": 6345936925493.457, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936925542.865, "dur": 4.811, + "args": { + "External id": 975191,"Sequence number": 10552338, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14235 + } + }, + { + "ph": "s", "id": 128, "pid": 2338708, "tid": 2338708, "ts": 6345936925542.865, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936925556.449, "dur": 64.249, + "args": { + "External id": 975192,"Sequence number": 10552339, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936925557.144, "dur": 7.680, + "args": { + "External id": 975193,"Sequence number": 10552339, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14237 + } + }, + { + "ph": "s", "id": 127, "pid": 2338708, "tid": 2338708, "ts": 6345936925557.144, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936925561.351, "dur": 2.272, + "args": { + "External id": 975194,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936925562.964, "dur": 0.480, + "args": { + "External id": 975195,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936925565.635, "dur": 54.849, + "args": { + "External id": 975196,"Sequence number": 10552340, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936925566.556, "dur": 8.552, + "args": { + "External id": 975197,"Sequence number": 10552340, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936925567.682, "dur": 7.288, + "args": { + "External id": 975198,"Sequence number": 10552340, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14242 + } + }, + { + "ph": "s", "id": 126, "pid": 2338708, "tid": 2338708, "ts": 6345936925567.682, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936925575.909, "dur": 40.059, + "args": { + "External id": 975199,"Sequence number": 10552341, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14243 + } + }, + { + "ph": "s", "id": 125, "pid": 2338708, "tid": 2338708, "ts": 6345936925575.909, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936925617.856, "dur": 2.194, + "args": { + "External id": 975200,"Sequence number": 10552342, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14244 + } + }, + { + "ph": "s", "id": 124, "pid": 2338708, "tid": 2338708, "ts": 6345936925617.856, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936925638.310, "dur": 4.684, + "args": { + "External id": 975201,"Sequence number": 10552343, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936925639.550, "dur": 3.275, + "args": { + "External id": 975202,"Sequence number": 10552343, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14246 + } + }, + { + "ph": "s", "id": 123, "pid": 2338708, "tid": 2338708, "ts": 6345936925639.550, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936925653.153, "dur": 5.221, + "args": { + "External id": 975203,"Sequence number": 10552344, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936925654.191, "dur": 3.998, + "args": { + "External id": 975204,"Sequence number": 10552344, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14248 + } + }, + { + "ph": "s", "id": 122, "pid": 2338708, "tid": 2338708, "ts": 6345936925654.191, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936925663.494, "dur": 3.309, + "args": { + "External id": 975205,"Sequence number": 10552345, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936925664.636, "dur": 2.022, + "args": { + "External id": 975206,"Sequence number": 10552345, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14250 + } + }, + { + "ph": "s", "id": 121, "pid": 2338708, "tid": 2338708, "ts": 6345936925664.636, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936925697.620, "dur": 167.357, + "args": { + "External id": 975207,"Sequence number": 10552346, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14251 + } + }, + { + "ph": "s", "id": 120, "pid": 2338708, "tid": 2338708, "ts": 6345936925697.620, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936925718.427, "dur": 8.198, + "args": { + "External id": 975208,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936925721.588, "dur": 4.688, + "args": { + "External id": 975209,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936925879.141, "dur": 118.904, + "args": { + "External id": 975210,"Sequence number": 10552347, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14254 + } + }, + { + "ph": "s", "id": 119, "pid": 2338708, "tid": 2338708, "ts": 6345936925879.141, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936925894.988, "dur": 7.094, + "args": { + "External id": 975211,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936925897.340, "dur": 4.409, + "args": { + "External id": 975212,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338708, "tid": 2338708, + "ts": 6345936926093.978, "dur": 226.876, + "args": { + "External id": 975213,"Sequence number": 10552348, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14257 + } + }, + { + "ph": "s", "id": 118, "pid": 2338708, "tid": 2338708, "ts": 6345936926093.978, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936926134.598, "dur": 151.031, + "args": { + "External id": 975214,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936926192.710, "dur": 13.251, + "args": { + "External id": 975215,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936926195.985, "dur": 9.183, + "args": { + "External id": 975216,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936926209.457, "dur": 4.466, + "args": { + "External id": 975217,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936926215.494, "dur": 1.335, + "args": { + "External id": 975218,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936926219.579, "dur": 3.883, + "args": { + "External id": 975219,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2338708, + "ts": 6345936926299.880, "dur": 5.801, + "args": { + "External id": 975220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936926327.241, "dur": 7.059, + "args": { + "External id": 975221,"Sequence number": 10552349, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936926328.918, "dur": 5.179, + "args": { + "External id": 975222,"Sequence number": 10552349, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14266 + } + }, + { + "ph": "s", "id": 117, "pid": 2338708, "tid": 2338708, "ts": 6345936926328.918, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936926349.998, "dur": 132.890, + "args": { + "External id": 975223,"Sequence number": 10552350, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936926351.683, "dur": 9.577, + "args": { + "External id": 975224,"Sequence number": 10552350, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14268 + } + }, + { + "ph": "s", "id": 116, "pid": 2338708, "tid": 2338708, "ts": 6345936926351.683, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936926354.688, "dur": 5.308, + "args": { + "External id": 975225,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936926357.707, "dur": 1.965, + "args": { + "External id": 975226,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936926365.225, "dur": 117.178, + "args": { + "External id": 975227,"Sequence number": 10552351, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936926367.841, "dur": 3.287, + "args": { + "External id": 975228,"Sequence number": 10552351, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936926368.445, "dur": 2.538, + "args": { + "External id": 975229,"Sequence number": 10552351, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14273 + } + }, + { + "ph": "s", "id": 115, "pid": 2338708, "tid": 2338708, "ts": 6345936926368.445, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936926372.359, "dur": 97.981, + "args": { + "External id": 975230,"Sequence number": 10552352, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14274 + } + }, + { + "ph": "s", "id": 114, "pid": 2338708, "tid": 2338708, "ts": 6345936926372.359, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936926473.115, "dur": 8.340, + "args": { + "External id": 975231,"Sequence number": 10552353, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14275 + } + }, + { + "ph": "s", "id": 113, "pid": 2338708, "tid": 2338708, "ts": 6345936926473.115, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936926533.612, "dur": 253.313, + "args": { + "External id": 975232,"Sequence number": 10552354, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14276 + } + }, + { + "ph": "s", "id": 112, "pid": 2338708, "tid": 2338708, "ts": 6345936926533.612, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936926556.255, "dur": 4.698, + "args": { + "External id": 975233,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936926557.457, "dur": 3.246, + "args": { + "External id": 975234,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936926565.825, "dur": 6.135, + "args": { + "External id": 975235,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936926570.099, "dur": 1.696, + "args": { + "External id": 975236,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936926570.766, "dur": 0.924, + "args": { + "External id": 975237,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936926580.912, "dur": 7.134, + "args": { + "External id": 975238,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936926582.872, "dur": 4.676, + "args": { + "External id": 975239,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936926594.851, "dur": 3.176, + "args": { + "External id": 975240,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936926602.131, "dur": 4.159, + "args": { + "External id": 975241,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936926765.359, "dur": 3.748, + "args": { + "External id": 975242,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936926766.288, "dur": 2.575, + "args": { + "External id": 975243,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936926771.831, "dur": 2.202, + "args": { + "External id": 975244,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936926772.621, "dur": 1.289, + "args": { + "External id": 975245,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936926808.830, "dur": 112.141, + "args": { + "External id": 975246,"Sequence number": 10552355, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936926810.347, "dur": 11.184, + "args": { + "External id": 975247,"Sequence number": 10552355, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14291 + } + }, + { + "ph": "s", "id": 111, "pid": 2338708, "tid": 2338708, "ts": 6345936926810.347, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936926813.257, "dur": 6.977, + "args": { + "External id": 975248,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936926818.387, "dur": 1.574, + "args": { + "External id": 975249,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936926822.647, "dur": 98.033, + "args": { + "External id": 975250,"Sequence number": 10552356, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936926824.266, "dur": 6.428, + "args": { + "External id": 975251,"Sequence number": 10552356, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936926825.197, "dur": 5.364, + "args": { + "External id": 975252,"Sequence number": 10552356, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14296 + } + }, + { + "ph": "s", "id": 110, "pid": 2338708, "tid": 2338708, "ts": 6345936926825.197, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936926833.897, "dur": 80.174, + "args": { + "External id": 975253,"Sequence number": 10552357, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14297 + } + }, + { + "ph": "s", "id": 109, "pid": 2338708, "tid": 2338708, "ts": 6345936926833.897, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936926917.096, "dur": 2.885, + "args": { + "External id": 975254,"Sequence number": 10552358, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14298 + } + }, + { + "ph": "s", "id": 108, "pid": 2338708, "tid": 2338708, "ts": 6345936926917.096, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936926930.624, "dur": 118.977, + "args": { + "External id": 975255,"Sequence number": 10552359, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936926931.550, "dur": 18.378, + "args": { + "External id": 975256,"Sequence number": 10552359, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14300 + } + }, + { + "ph": "s", "id": 107, "pid": 2338708, "tid": 2338708, "ts": 6345936926931.550, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936926940.344, "dur": 8.181, + "args": { + "External id": 975257,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936926947.322, "dur": 1.015, + "args": { + "External id": 975258,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936926950.740, "dur": 98.521, + "args": { + "External id": 975259,"Sequence number": 10552360, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936926951.917, "dur": 5.875, + "args": { + "External id": 975260,"Sequence number": 10552360, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936926952.767, "dur": 4.840, + "args": { + "External id": 975261,"Sequence number": 10552360, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14305 + } + }, + { + "ph": "s", "id": 106, "pid": 2338708, "tid": 2338708, "ts": 6345936926952.767, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936926958.878, "dur": 78.707, + "args": { + "External id": 975262,"Sequence number": 10552361, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14306 + } + }, + { + "ph": "s", "id": 105, "pid": 2338708, "tid": 2338708, "ts": 6345936926958.878, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936927041.123, "dur": 7.372, + "args": { + "External id": 975263,"Sequence number": 10552362, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14307 + } + }, + { + "ph": "s", "id": 104, "pid": 2338708, "tid": 2338708, "ts": 6345936927041.123, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936927120.470, "dur": 204.285, + "args": { + "External id": 975264,"Sequence number": 10552363, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14308 + } + }, + { + "ph": "s", "id": 103, "pid": 2338708, "tid": 2338708, "ts": 6345936927120.470, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936927170.697, "dur": 6.989, + "args": { + "External id": 975265,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936927225.750, "dur": 81.506, + "args": { + "External id": 975266,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936927226.948, "dur": 7.304, + "args": { + "External id": 975267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936927228.872, "dur": 4.069, + "args": { + "External id": 975268,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936927230.969, "dur": 1.706, + "args": { + "External id": 975269,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936927235.642, "dur": 71.133, + "args": { + "External id": 975270,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936927237.706, "dur": 3.001, + "args": { + "External id": 975271,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936927238.681, "dur": 1.916, + "args": { + "External id": 975272,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936927241.308, "dur": 60.707, + "args": { + "External id": 975273,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936927304.657, "dur": 1.127, + "args": { + "External id": 975274,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345936927336.202, "dur": 29.246, + "args": { + "External id": 975275,"Sequence number": 10552364, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14319 + } + }, + { + "ph": "s", "id": 102, "pid": 2338708, "tid": 2338708, "ts": 6345936927336.202, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936927411.538, "dur": 229.280, + "args": { + "External id": 975276,"Sequence number": 10552365, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14320 + } + }, + { + "ph": "s", "id": 101, "pid": 2338708, "tid": 2338708, "ts": 6345936927411.538, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936927434.261, "dur": 3.851, + "args": { + "External id": 975277,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936927435.412, "dur": 2.505, + "args": { + "External id": 975278,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936927448.113, "dur": 8.639, + "args": { + "External id": 975279,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936927451.158, "dur": 5.186, + "args": { + "External id": 975280,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936927464.079, "dur": 6.456, + "args": { + "External id": 975281,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936927624.664, "dur": 3.335, + "args": { + "External id": 975282,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936927625.863, "dur": 1.961, + "args": { + "External id": 975283,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936927664.467, "dur": 99.450, + "args": { + "External id": 975284,"Sequence number": 10552366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936927665.712, "dur": 8.612, + "args": { + "External id": 975285,"Sequence number": 10552366, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14329 + } + }, + { + "ph": "s", "id": 100, "pid": 2338708, "tid": 2338708, "ts": 6345936927665.712, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936927668.515, "dur": 4.141, + "args": { + "External id": 975286,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936927670.783, "dur": 1.641, + "args": { + "External id": 975287,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936927678.059, "dur": 85.546, + "args": { + "External id": 975288,"Sequence number": 10552367, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936927679.700, "dur": 4.444, + "args": { + "External id": 975289,"Sequence number": 10552367, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936927680.479, "dur": 3.513, + "args": { + "External id": 975290,"Sequence number": 10552367, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14334 + } + }, + { + "ph": "s", "id": 99, "pid": 2338708, "tid": 2338708, "ts": 6345936927680.479, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936927685.160, "dur": 69.665, + "args": { + "External id": 975291,"Sequence number": 10552368, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14335 + } + }, + { + "ph": "s", "id": 98, "pid": 2338708, "tid": 2338708, "ts": 6345936927685.160, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936927757.837, "dur": 4.920, + "args": { + "External id": 975292,"Sequence number": 10552369, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14336 + } + }, + { + "ph": "s", "id": 97, "pid": 2338708, "tid": 2338708, "ts": 6345936927757.837, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936927773.309, "dur": 68.902, + "args": { + "External id": 975293,"Sequence number": 10552370, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936927774.297, "dur": 6.262, + "args": { + "External id": 975294,"Sequence number": 10552370, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14338 + } + }, + { + "ph": "s", "id": 96, "pid": 2338708, "tid": 2338708, "ts": 6345936927774.297, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936927776.423, "dur": 2.897, + "args": { + "External id": 975295,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936927778.157, "dur": 0.979, + "args": { + "External id": 975296,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936927781.346, "dur": 60.577, + "args": { + "External id": 975297,"Sequence number": 10552371, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936927785.430, "dur": 3.656, + "args": { + "External id": 975298,"Sequence number": 10552371, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936927786.310, "dur": 2.639, + "args": { + "External id": 975299,"Sequence number": 10552371, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14343 + } + }, + { + "ph": "s", "id": 95, "pid": 2338708, "tid": 2338708, "ts": 6345936927786.310, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936927789.597, "dur": 48.052, + "args": { + "External id": 975300,"Sequence number": 10552372, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14344 + } + }, + { + "ph": "s", "id": 94, "pid": 2338708, "tid": 2338708, "ts": 6345936927789.597, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936927839.596, "dur": 1.970, + "args": { + "External id": 975301,"Sequence number": 10552373, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14345 + } + }, + { + "ph": "s", "id": 93, "pid": 2338708, "tid": 2338708, "ts": 6345936927839.596, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936927849.755, "dur": 68.494, + "args": { + "External id": 975302,"Sequence number": 10552374, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936927852.798, "dur": 7.149, + "args": { + "External id": 975303,"Sequence number": 10552374, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14347 + } + }, + { + "ph": "s", "id": 92, "pid": 2338708, "tid": 2338708, "ts": 6345936927852.798, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936927855.122, "dur": 3.352, + "args": { + "External id": 975304,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936927856.376, "dur": 1.811, + "args": { + "External id": 975305,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936927860.812, "dur": 57.127, + "args": { + "External id": 975306,"Sequence number": 10552375, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936927862.299, "dur": 7.306, + "args": { + "External id": 975307,"Sequence number": 10552375, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936927865.800, "dur": 3.683, + "args": { + "External id": 975308,"Sequence number": 10552375, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14352 + } + }, + { + "ph": "s", "id": 91, "pid": 2338708, "tid": 2338708, "ts": 6345936927865.800, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936927870.138, "dur": 39.528, + "args": { + "External id": 975309,"Sequence number": 10552376, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14353 + } + }, + { + "ph": "s", "id": 90, "pid": 2338708, "tid": 2338708, "ts": 6345936927870.138, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936927911.498, "dur": 6.012, + "args": { + "External id": 975310,"Sequence number": 10552377, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14354 + } + }, + { + "ph": "s", "id": 89, "pid": 2338708, "tid": 2338708, "ts": 6345936927911.498, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936927936.850, "dur": 7.094, + "args": { + "External id": 975311,"Sequence number": 10552378, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936927940.824, "dur": 2.925, + "args": { + "External id": 975312,"Sequence number": 10552378, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14356 + } + }, + { + "ph": "s", "id": 88, "pid": 2338708, "tid": 2338708, "ts": 6345936927940.824, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936927952.070, "dur": 3.392, + "args": { + "External id": 975313,"Sequence number": 10552379, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936927953.493, "dur": 1.828, + "args": { + "External id": 975314,"Sequence number": 10552379, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14358 + } + }, + { + "ph": "s", "id": 87, "pid": 2338708, "tid": 2338708, "ts": 6345936927953.493, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936927960.230, "dur": 5.294, + "args": { + "External id": 975315,"Sequence number": 10552380, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936927961.306, "dur": 4.073, + "args": { + "External id": 975316,"Sequence number": 10552380, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14360 + } + }, + { + "ph": "s", "id": 86, "pid": 2338708, "tid": 2338708, "ts": 6345936927961.306, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936927999.524, "dur": 262.334, + "args": { + "External id": 975317,"Sequence number": 10552381, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14361 + } + }, + { + "ph": "s", "id": 85, "pid": 2338708, "tid": 2338708, "ts": 6345936927999.524, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936928044.037, "dur": 52.795, + "args": { + "External id": 975318,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936928047.177, "dur": 48.420, + "args": { + "External id": 975319,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936928279.934, "dur": 133.096, + "args": { + "External id": 975320,"Sequence number": 10552382, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14364 + } + }, + { + "ph": "s", "id": 84, "pid": 2338708, "tid": 2338708, "ts": 6345936928279.934, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936928297.529, "dur": 8.589, + "args": { + "External id": 975321,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936928300.289, "dur": 5.346, + "args": { + "External id": 975322,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338708, "tid": 2338708, + "ts": 6345936928447.376, "dur": 206.188, + "args": { + "External id": 975323,"Sequence number": 10552383, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14367 + } + }, + { + "ph": "s", "id": 83, "pid": 2338708, "tid": 2338708, "ts": 6345936928447.376, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936928478.324, "dur": 145.242, + "args": { + "External id": 975324,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936928534.545, "dur": 7.904, + "args": { + "External id": 975325,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936928536.986, "dur": 4.783, + "args": { + "External id": 975326,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936928545.354, "dur": 6.828, + "args": { + "External id": 975327,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936928553.995, "dur": 1.500, + "args": { + "External id": 975328,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936928560.699, "dur": 3.543, + "args": { + "External id": 975329,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2338708, + "ts": 6345936928638.103, "dur": 5.735, + "args": { + "External id": 975330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936928659.637, "dur": 7.089, + "args": { + "External id": 975331,"Sequence number": 10552384, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936928661.454, "dur": 5.063, + "args": { + "External id": 975332,"Sequence number": 10552384, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14376 + } + }, + { + "ph": "s", "id": 82, "pid": 2338708, "tid": 2338708, "ts": 6345936928661.454, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936928681.000, "dur": 151.898, + "args": { + "External id": 975333,"Sequence number": 10552385, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936928683.005, "dur": 14.442, + "args": { + "External id": 975334,"Sequence number": 10552385, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14378 + } + }, + { + "ph": "s", "id": 81, "pid": 2338708, "tid": 2338708, "ts": 6345936928683.005, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936928689.076, "dur": 7.230, + "args": { + "External id": 975335,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936928694.026, "dur": 1.947, + "args": { + "External id": 975336,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936928698.943, "dur": 133.542, + "args": { + "External id": 975337,"Sequence number": 10552386, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936928701.286, "dur": 5.315, + "args": { + "External id": 975338,"Sequence number": 10552386, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936928702.254, "dur": 4.205, + "args": { + "External id": 975339,"Sequence number": 10552386, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14383 + } + }, + { + "ph": "s", "id": 80, "pid": 2338708, "tid": 2338708, "ts": 6345936928702.254, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936928710.516, "dur": 110.843, + "args": { + "External id": 975340,"Sequence number": 10552387, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14384 + } + }, + { + "ph": "s", "id": 79, "pid": 2338708, "tid": 2338708, "ts": 6345936928710.516, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936928823.962, "dur": 7.554, + "args": { + "External id": 975341,"Sequence number": 10552388, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14385 + } + }, + { + "ph": "s", "id": 78, "pid": 2338708, "tid": 2338708, "ts": 6345936928823.962, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936928870.486, "dur": 321.733, + "args": { + "External id": 975342,"Sequence number": 10552389, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14386 + } + }, + { + "ph": "s", "id": 77, "pid": 2338708, "tid": 2338708, "ts": 6345936928870.486, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936928890.398, "dur": 5.630, + "args": { + "External id": 975343,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936928893.871, "dur": 1.940, + "args": { + "External id": 975344,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936928900.093, "dur": 3.765, + "args": { + "External id": 975345,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936928901.408, "dur": 2.258, + "args": { + "External id": 975346,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936928902.139, "dur": 1.388, + "args": { + "External id": 975347,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936928912.903, "dur": 16.587, + "args": { + "External id": 975348,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936928922.582, "dur": 6.473, + "args": { + "External id": 975349,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936928939.304, "dur": 2.607, + "args": { + "External id": 975350,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936928946.532, "dur": 4.650, + "args": { + "External id": 975351,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936929164.689, "dur": 5.669, + "args": { + "External id": 975352,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936929166.051, "dur": 3.939, + "args": { + "External id": 975353,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936929173.587, "dur": 2.173, + "args": { + "External id": 975354,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936929174.764, "dur": 0.884, + "args": { + "External id": 975355,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936929216.075, "dur": 147.445, + "args": { + "External id": 975356,"Sequence number": 10552390, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936929217.553, "dur": 13.238, + "args": { + "External id": 975357,"Sequence number": 10552390, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14401 + } + }, + { + "ph": "s", "id": 76, "pid": 2338708, "tid": 2338708, "ts": 6345936929217.553, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936929222.685, "dur": 6.456, + "args": { + "External id": 975358,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936929225.369, "dur": 3.331, + "args": { + "External id": 975359,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936929232.001, "dur": 131.157, + "args": { + "External id": 975360,"Sequence number": 10552391, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936929252.038, "dur": 6.497, + "args": { + "External id": 975361,"Sequence number": 10552391, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936929255.834, "dur": 2.573, + "args": { + "External id": 975362,"Sequence number": 10552391, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14406 + } + }, + { + "ph": "s", "id": 75, "pid": 2338708, "tid": 2338708, "ts": 6345936929255.834, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936929259.727, "dur": 96.287, + "args": { + "External id": 975363,"Sequence number": 10552392, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14407 + } + }, + { + "ph": "s", "id": 74, "pid": 2338708, "tid": 2338708, "ts": 6345936929259.727, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936929359.212, "dur": 3.204, + "args": { + "External id": 975364,"Sequence number": 10552393, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14408 + } + }, + { + "ph": "s", "id": 73, "pid": 2338708, "tid": 2338708, "ts": 6345936929359.212, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936929374.047, "dur": 84.491, + "args": { + "External id": 975365,"Sequence number": 10552394, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936929374.674, "dur": 10.091, + "args": { + "External id": 975366,"Sequence number": 10552394, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14410 + } + }, + { + "ph": "s", "id": 72, "pid": 2338708, "tid": 2338708, "ts": 6345936929374.674, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936929378.459, "dur": 4.879, + "args": { + "External id": 975367,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936929380.242, "dur": 2.887, + "args": { + "External id": 975368,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936929385.534, "dur": 72.643, + "args": { + "External id": 975369,"Sequence number": 10552395, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936929386.622, "dur": 8.378, + "args": { + "External id": 975370,"Sequence number": 10552395, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936929387.463, "dur": 7.353, + "args": { + "External id": 975371,"Sequence number": 10552395, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14415 + } + }, + { + "ph": "s", "id": 71, "pid": 2338708, "tid": 2338708, "ts": 6345936929387.463, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936929395.683, "dur": 53.452, + "args": { + "External id": 975372,"Sequence number": 10552396, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14416 + } + }, + { + "ph": "s", "id": 70, "pid": 2338708, "tid": 2338708, "ts": 6345936929395.683, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936929451.380, "dur": 5.935, + "args": { + "External id": 975373,"Sequence number": 10552397, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14417 + } + }, + { + "ph": "s", "id": 69, "pid": 2338708, "tid": 2338708, "ts": 6345936929451.380, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936929485.378, "dur": 176.634, + "args": { + "External id": 975374,"Sequence number": 10552398, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14418 + } + }, + { + "ph": "s", "id": 68, "pid": 2338708, "tid": 2338708, "ts": 6345936929485.378, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936929535.231, "dur": 4.902, + "args": { + "External id": 975375,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936929576.658, "dur": 70.591, + "args": { + "External id": 975376,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936929577.495, "dur": 5.539, + "args": { + "External id": 975377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936929578.840, "dur": 3.296, + "args": { + "External id": 975378,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936929580.705, "dur": 1.177, + "args": { + "External id": 975379,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936929583.809, "dur": 63.006, + "args": { + "External id": 975380,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936929585.175, "dur": 4.566, + "args": { + "External id": 975381,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936929588.330, "dur": 1.265, + "args": { + "External id": 975382,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936929590.446, "dur": 50.372, + "args": { + "External id": 975383,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936929643.142, "dur": 2.749, + "args": { + "External id": 975384,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345936929671.989, "dur": 28.902, + "args": { + "External id": 975385,"Sequence number": 10552399, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14429 + } + }, + { + "ph": "s", "id": 67, "pid": 2338708, "tid": 2338708, "ts": 6345936929671.989, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936929742.616, "dur": 229.062, + "args": { + "External id": 975386,"Sequence number": 10552400, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 14430 + } + }, + { + "ph": "s", "id": 66, "pid": 2338708, "tid": 2338708, "ts": 6345936929742.616, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936929767.025, "dur": 2.855, + "args": { + "External id": 975387,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936929767.742, "dur": 1.935, + "args": { + "External id": 975388,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936929780.252, "dur": 10.653, + "args": { + "External id": 975389,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936929785.624, "dur": 4.852, + "args": { + "External id": 975390,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936929799.117, "dur": 4.659, + "args": { + "External id": 975391,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936929956.718, "dur": 3.058, + "args": { + "External id": 975392,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936929957.571, "dur": 2.019, + "args": { + "External id": 975393,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936929993.518, "dur": 186.501, + "args": { + "External id": 975394,"Sequence number": 10552401, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936929994.770, "dur": 9.979, + "args": { + "External id": 975395,"Sequence number": 10552401, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14439 + } + }, + { + "ph": "s", "id": 65, "pid": 2338708, "tid": 2338708, "ts": 6345936929994.770, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936929997.205, "dur": 5.766, + "args": { + "External id": 975396,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936930001.300, "dur": 1.331, + "args": { + "External id": 975397,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936930005.520, "dur": 174.011, + "args": { + "External id": 975398,"Sequence number": 10552402, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936930006.974, "dur": 26.706, + "args": { + "External id": 975399,"Sequence number": 10552402, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936930028.938, "dur": 4.392, + "args": { + "External id": 975400,"Sequence number": 10552402, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14444 + } + }, + { + "ph": "s", "id": 64, "pid": 2338708, "tid": 2338708, "ts": 6345936930028.938, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936930041.690, "dur": 128.957, + "args": { + "External id": 975401,"Sequence number": 10552403, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14445 + } + }, + { + "ph": "s", "id": 63, "pid": 2338708, "tid": 2338708, "ts": 6345936930041.690, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936930174.897, "dur": 3.692, + "args": { + "External id": 975402,"Sequence number": 10552404, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14446 + } + }, + { + "ph": "s", "id": 62, "pid": 2338708, "tid": 2338708, "ts": 6345936930174.897, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936930193.430, "dur": 83.028, + "args": { + "External id": 975403,"Sequence number": 10552405, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936930194.541, "dur": 9.578, + "args": { + "External id": 975404,"Sequence number": 10552405, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14448 + } + }, + { + "ph": "s", "id": 61, "pid": 2338708, "tid": 2338708, "ts": 6345936930194.541, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936930196.564, "dur": 6.189, + "args": { + "External id": 975405,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936930201.136, "dur": 1.369, + "args": { + "External id": 975406,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936930205.022, "dur": 71.139, + "args": { + "External id": 975407,"Sequence number": 10552406, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936930206.184, "dur": 4.080, + "args": { + "External id": 975408,"Sequence number": 10552406, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936930207.270, "dur": 2.875, + "args": { + "External id": 975409,"Sequence number": 10552406, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14453 + } + }, + { + "ph": "s", "id": 60, "pid": 2338708, "tid": 2338708, "ts": 6345936930207.270, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936930211.106, "dur": 58.445, + "args": { + "External id": 975410,"Sequence number": 10552407, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14454 + } + }, + { + "ph": "s", "id": 59, "pid": 2338708, "tid": 2338708, "ts": 6345936930211.106, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936930271.829, "dur": 3.676, + "args": { + "External id": 975411,"Sequence number": 10552408, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14455 + } + }, + { + "ph": "s", "id": 58, "pid": 2338708, "tid": 2338708, "ts": 6345936930271.829, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936930284.839, "dur": 66.675, + "args": { + "External id": 975412,"Sequence number": 10552409, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 14456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936930285.491, "dur": 7.083, + "args": { + "External id": 975413,"Sequence number": 10552409, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 14457 + } + }, + { + "ph": "s", "id": 57, "pid": 2338708, "tid": 2338708, "ts": 6345936930285.491, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936930287.104, "dur": 4.361, + "args": { + "External id": 975414,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 14458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936930288.964, "dur": 2.247, + "args": { + "External id": 975415,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 14459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936930293.393, "dur": 57.773, + "args": { + "External id": 975416,"Sequence number": 10552410, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 14460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936930296.963, "dur": 4.970, + "args": { + "External id": 975417,"Sequence number": 10552410, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936930297.977, "dur": 3.794, + "args": { + "External id": 975418,"Sequence number": 10552410, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14462 + } + }, + { + "ph": "s", "id": 56, "pid": 2338708, "tid": 2338708, "ts": 6345936930297.977, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936930302.531, "dur": 42.512, + "args": { + "External id": 975419,"Sequence number": 10552411, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 14463 + } + }, + { + "ph": "s", "id": 55, "pid": 2338708, "tid": 2338708, "ts": 6345936930302.531, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936930347.147, "dur": 3.574, + "args": { + "External id": 975420,"Sequence number": 10552412, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 14464 + } + }, + { + "ph": "s", "id": 54, "pid": 2338708, "tid": 2338708, "ts": 6345936930347.147, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936930374.490, "dur": 4.037, + "args": { + "External id": 975421,"Sequence number": 10552413, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936930375.378, "dur": 2.949, + "args": { + "External id": 975422,"Sequence number": 10552413, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14466 + } + }, + { + "ph": "s", "id": 53, "pid": 2338708, "tid": 2338708, "ts": 6345936930375.378, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936930387.006, "dur": 4.143, + "args": { + "External id": 975423,"Sequence number": 10552414, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936930387.745, "dur": 3.261, + "args": { + "External id": 975424,"Sequence number": 10552414, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14468 + } + }, + { + "ph": "s", "id": 52, "pid": 2338708, "tid": 2338708, "ts": 6345936930387.745, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936930396.101, "dur": 5.186, + "args": { + "External id": 975425,"Sequence number": 10552415, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936930399.364, "dur": 1.781, + "args": { + "External id": 975426,"Sequence number": 10552415, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 14470 + } + }, + { + "ph": "s", "id": 51, "pid": 2338708, "tid": 2338708, "ts": 6345936930399.364, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936930437.376, "dur": 190.113, + "args": { + "External id": 975427,"Sequence number": 10552416, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14471 + } + }, + { + "ph": "s", "id": 50, "pid": 2338708, "tid": 2338708, "ts": 6345936930437.376, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936930461.593, "dur": 12.005, + "args": { + "External id": 975428,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936930465.073, "dur": 7.996, + "args": { + "External id": 975429,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936930642.361, "dur": 127.652, + "args": { + "External id": 975430,"Sequence number": 10552417, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 14474 + } + }, + { + "ph": "s", "id": 49, "pid": 2338708, "tid": 2338708, "ts": 6345936930642.361, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936930658.732, "dur": 7.277, + "args": { + "External id": 975431,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 14475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936930661.381, "dur": 4.244, + "args": { + "External id": 975432,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338708, "tid": 2338708, + "ts": 6345936930805.077, "dur": 227.549, + "args": { + "External id": 975433,"Sequence number": 10552418, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 14477 + } + }, + { + "ph": "s", "id": 48, "pid": 2338708, "tid": 2338708, "ts": 6345936930805.077, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345936930836.989, "dur": 144.424, + "args": { + "External id": 975434,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 14478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936930898.321, "dur": 7.391, + "args": { + "External id": 975435,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 14479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936930900.635, "dur": 4.539, + "args": { + "External id": 975436,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936930908.879, "dur": 4.027, + "args": { + "External id": 975437,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936930914.400, "dur": 1.699, + "args": { + "External id": 975438,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936930919.039, "dur": 3.736, + "args": { + "External id": 975439,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2338708, + "ts": 6345936930995.432, "dur": 5.058, + "args": { + "External id": 975440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 14484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936931041.364, "dur": 6.574, + "args": { + "External id": 975441,"Sequence number": 10552419, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936931042.711, "dur": 5.065, + "args": { + "External id": 975442,"Sequence number": 10552419, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 14486 + } + }, + { + "ph": "s", "id": 47, "pid": 2338708, "tid": 2338708, "ts": 6345936931042.711, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936931107.036, "dur": 133.188, + "args": { + "External id": 975443,"Sequence number": 10552420, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 14487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936931108.666, "dur": 13.566, + "args": { + "External id": 975444,"Sequence number": 10552420, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 14488 + } + }, + { + "ph": "s", "id": 46, "pid": 2338708, "tid": 2338708, "ts": 6345936931108.666, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936931115.383, "dur": 5.141, + "args": { + "External id": 975445,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 14489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936931118.233, "dur": 1.841, + "args": { + "External id": 975446,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 14490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936931123.896, "dur": 115.923, + "args": { + "External id": 975447,"Sequence number": 10552421, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 14491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936931125.961, "dur": 5.529, + "args": { + "External id": 975448,"Sequence number": 10552421, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936931126.752, "dur": 4.537, + "args": { + "External id": 975449,"Sequence number": 10552421, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14493 + } + }, + { + "ph": "s", "id": 45, "pid": 2338708, "tid": 2338708, "ts": 6345936931126.752, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936931132.546, "dur": 98.294, + "args": { + "External id": 975450,"Sequence number": 10552422, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 14494 + } + }, + { + "ph": "s", "id": 44, "pid": 2338708, "tid": 2338708, "ts": 6345936931132.546, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936931233.732, "dur": 5.159, + "args": { + "External id": 975451,"Sequence number": 10552423, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14495 + } + }, + { + "ph": "s", "id": 43, "pid": 2338708, "tid": 2338708, "ts": 6345936931233.732, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936931282.747, "dur": 254.898, + "args": { + "External id": 975452,"Sequence number": 10552424, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 14496 + } + }, + { + "ph": "s", "id": 42, "pid": 2338708, "tid": 2338708, "ts": 6345936931282.747, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936931307.428, "dur": 8.371, + "args": { + "External id": 975453,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936931311.372, "dur": 4.235, + "args": { + "External id": 975454,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338708, "tid": 2338708, + "ts": 6345936931320.207, "dur": 3.115, + "args": { + "External id": 975455,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 14499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936931321.192, "dur": 1.951, + "args": { + "External id": 975456,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936931321.994, "dur": 1.046, + "args": { + "External id": 975457,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936931333.314, "dur": 8.208, + "args": { + "External id": 975458,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936931335.639, "dur": 5.521, + "args": { + "External id": 975459,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936931351.243, "dur": 3.096, + "args": { + "External id": 975460,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936931358.644, "dur": 3.633, + "args": { + "External id": 975461,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936931512.196, "dur": 3.576, + "args": { + "External id": 975462,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936931513.301, "dur": 2.254, + "args": { + "External id": 975463,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936931518.474, "dur": 1.942, + "args": { + "External id": 975464,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936931519.212, "dur": 1.069, + "args": { + "External id": 975465,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936931558.432, "dur": 103.443, + "args": { + "External id": 975466,"Sequence number": 10552425, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936931559.686, "dur": 8.013, + "args": { + "External id": 975467,"Sequence number": 10552425, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14511 + } + }, + { + "ph": "s", "id": 41, "pid": 2338708, "tid": 2338708, "ts": 6345936931559.686, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936931562.037, "dur": 4.301, + "args": { + "External id": 975468,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936931564.318, "dur": 1.729, + "args": { + "External id": 975469,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936931568.875, "dur": 92.671, + "args": { + "External id": 975470,"Sequence number": 10552426, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936931570.586, "dur": 6.222, + "args": { + "External id": 975471,"Sequence number": 10552426, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936931574.319, "dur": 2.366, + "args": { + "External id": 975472,"Sequence number": 10552426, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14516 + } + }, + { + "ph": "s", "id": 40, "pid": 2338708, "tid": 2338708, "ts": 6345936931574.319, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936931577.539, "dur": 78.821, + "args": { + "External id": 975473,"Sequence number": 10552427, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14517 + } + }, + { + "ph": "s", "id": 39, "pid": 2338708, "tid": 2338708, "ts": 6345936931577.539, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936931658.507, "dur": 2.436, + "args": { + "External id": 975474,"Sequence number": 10552428, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14518 + } + }, + { + "ph": "s", "id": 38, "pid": 2338708, "tid": 2338708, "ts": 6345936931658.507, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936931670.887, "dur": 81.707, + "args": { + "External id": 975475,"Sequence number": 10552429, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 14519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936931671.680, "dur": 11.419, + "args": { + "External id": 975476,"Sequence number": 10552429, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 14520 + } + }, + { + "ph": "s", "id": 37, "pid": 2338708, "tid": 2338708, "ts": 6345936931671.680, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936931677.015, "dur": 4.709, + "args": { + "External id": 975477,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 14521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936931678.548, "dur": 2.974, + "args": { + "External id": 975478,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 14522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936931683.831, "dur": 68.468, + "args": { + "External id": 975479,"Sequence number": 10552430, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 14523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936931684.813, "dur": 7.272, + "args": { + "External id": 975480,"Sequence number": 10552430, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936931688.408, "dur": 3.540, + "args": { + "External id": 975481,"Sequence number": 10552430, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14525 + } + }, + { + "ph": "s", "id": 36, "pid": 2338708, "tid": 2338708, "ts": 6345936931688.408, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936931692.860, "dur": 52.575, + "args": { + "External id": 975482,"Sequence number": 10552431, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 14526 + } + }, + { + "ph": "s", "id": 35, "pid": 2338708, "tid": 2338708, "ts": 6345936931692.860, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936931747.666, "dur": 4.175, + "args": { + "External id": 975483,"Sequence number": 10552432, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 14527 + } + }, + { + "ph": "s", "id": 34, "pid": 2338708, "tid": 2338708, "ts": 6345936931747.666, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936931775.378, "dur": 172.112, + "args": { + "External id": 975484,"Sequence number": 10552433, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14528 + } + }, + { + "ph": "s", "id": 33, "pid": 2338708, "tid": 2338708, "ts": 6345936931775.378, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936931822.820, "dur": 4.824, + "args": { + "External id": 975485,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345936931863.589, "dur": 69.517, + "args": { + "External id": 975486,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 14530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345936931864.346, "dur": 5.121, + "args": { + "External id": 975487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 14531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936931865.826, "dur": 2.613, + "args": { + "External id": 975488,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 14532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936931867.251, "dur": 0.881, + "args": { + "External id": 975489,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 14533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345936931870.495, "dur": 62.224, + "args": { + "External id": 975490,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 14534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345936931872.142, "dur": 4.899, + "args": { + "External id": 975491,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936931875.587, "dur": 1.325, + "args": { + "External id": 975492,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 14536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345936931877.909, "dur": 50.709, + "args": { + "External id": 975493,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 14537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345936931930.959, "dur": 1.085, + "args": { + "External id": 975494,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345936931958.039, "dur": 27.167, + "args": { + "External id": 975495,"Sequence number": 10552434, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 14539 + } + }, + { + "ph": "s", "id": 32, "pid": 2338708, "tid": 2338708, "ts": 6345936931958.039, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338708, "tid": 2338708, + "ts": 6345936932029.270, "dur": 96.580, + "args": { + "External id": 975496,"Sequence number": 10552435, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "-2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 14540 + } + }, + { + "ph": "s", "id": 31, "pid": 2338708, "tid": 2338708, "ts": 6345936932029.270, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338708, "tid": 2338708, + "ts": 6345936932041.187, "dur": 76.036, + "args": { + "External id": 975497,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 14541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936932121.186, "dur": 1.728, + "args": { + "External id": 975498,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 16384], []], "Ev Idx": 14542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345936932168.726, "dur": 51.666, + "args": { + "External id": 975499,"Record function id": 0, "Ev Idx": 14543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 2338708, "tid": 2338708, + "ts": 6345936932221.780, "dur": 229.953, + "args": { + "External id": 975500,"Record function id": 0, "Ev Idx": 14544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936932266.291, "dur": 175.148, + "args": { + "External id": 975501,"Sequence number": 10552436, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [67108864, 16384, 4096, 1]], "Input Dims": [[4096], [8, 4096, 4, 4096]], "Ev Idx": 14545 + } + }, + { + "ph": "s", "id": 30, "pid": 2338708, "tid": 2338708, "ts": 6345936932266.291, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345936932349.297, "dur": 43.466, + "args": { + "External id": 975502,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [131072, 4096], [4096], [131072], [], [], [], [], [], [], [], [], []], "Ev Idx": 14546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345936932552.019, "dur": 42.855, + "args": { + "External id": 975503,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936932555.134, "dur": 7.071, + "args": { + "External id": 975504,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345936932565.224, "dur": 29.305, + "args": { + "External id": 975505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345936932568.172, "dur": 25.854, + "args": { + "External id": 975506,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345936932600.605, "dur": 22.182, + "args": { + "External id": 975507,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936932601.732, "dur": 3.119, + "args": { + "External id": 975508,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345936932608.053, "dur": 14.438, + "args": { + "External id": 975509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345936932609.014, "dur": 12.993, + "args": { + "External id": 975510,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345936932626.086, "dur": 21.253, + "args": { + "External id": 975511,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936932627.257, "dur": 4.625, + "args": { + "External id": 975512,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345936932632.594, "dur": 14.445, + "args": { + "External id": 975513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 14557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345936932633.029, "dur": 13.658, + "args": { + "External id": 975514,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 14558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936932658.705, "dur": 0.874, + "args": { + "External id": 975515,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 14559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338708, "tid": 2338708, + "ts": 6345936932668.834, "dur": 12.811, + "args": { + "External id": 975516,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "5", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 14560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932677.144, "dur": 2.407, + "args": { + "External id": 975517,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 14561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936932689.446, "dur": 7.520, + "args": { + "External id": 975518,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932693.786, "dur": 1.180, + "args": { + "External id": 975519,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936932698.350, "dur": 2.968, + "args": { + "External id": 975520,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932700.034, "dur": 0.410, + "args": { + "External id": 975521,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936932702.927, "dur": 3.313, + "args": { + "External id": 975522,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 14566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932704.958, "dur": 0.634, + "args": { + "External id": 975523,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 14567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936932709.911, "dur": 5.178, + "args": { + "External id": 975524,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 14568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932711.695, "dur": 2.703, + "args": { + "External id": 975525,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 14569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936932716.417, "dur": 3.403, + "args": { + "External id": 975526,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 14570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932718.305, "dur": 0.515, + "args": { + "External id": 975527,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 14571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936932721.045, "dur": 2.633, + "args": { + "External id": 975528,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4096, 4], [], [], [], []], "Ev Idx": 14572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932722.592, "dur": 0.434, + "args": { + "External id": 975529,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 14573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345936932728.438, "dur": 5.281, + "args": { + "External id": 975530,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4096, 4], [], []], "Ev Idx": 14574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932732.139, "dur": 0.700, + "args": { + "External id": 975531,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 14575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936932738.619, "dur": 3.187, + "args": { + "External id": 975532,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 14576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932740.373, "dur": 0.734, + "args": { + "External id": 975533,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 14577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345936932748.026, "dur": 7.944, + "args": { + "External id": 975534,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 14578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932754.099, "dur": 0.644, + "args": { + "External id": 975535,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 14579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936932757.346, "dur": 2.794, + "args": { + "External id": 975536,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 14580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932758.977, "dur": 0.535, + "args": { + "External id": 975537,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 14581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936932763.395, "dur": 8.563, + "args": { + "External id": 975538,"Sequence number": 10552437, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 14582 + } + }, + { + "ph": "s", "id": 29, "pid": 2338708, "tid": 2338708, "ts": 6345936932763.395, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932769.486, "dur": 0.703, + "args": { + "External id": 975539,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936932773.135, "dur": 6.936, + "args": { + "External id": 975540,"Sequence number": 10552438, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 14584 + } + }, + { + "ph": "s", "id": 28, "pid": 2338708, "tid": 2338708, "ts": 6345936932773.135, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932776.635, "dur": 2.424, + "args": { + "External id": 975541,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345936932781.066, "dur": 7.909, + "args": { + "External id": 975542,"Sequence number": 10552439, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 14586 + } + }, + { + "ph": "s", "id": 27, "pid": 2338708, "tid": 2338708, "ts": 6345936932781.066, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932787.296, "dur": 0.496, + "args": { + "External id": 975543,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 14587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345936932790.182, "dur": 4.481, + "args": { + "External id": 975544,"Sequence number": 10552440, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 14588 + } + }, + { + "ph": "s", "id": 26, "pid": 2338708, "tid": 2338708, "ts": 6345936932790.182, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932792.994, "dur": 0.767, + "args": { + "External id": 975545,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 14589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345936932799.120, "dur": 43.012, + "args": { + "External id": 975546,"Sequence number": 10552441, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345936932801.457, "dur": 40.355, + "args": { + "External id": 975547,"Sequence number": 10552441, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936932804.515, "dur": 7.280, + "args": { + "External id": 975548,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 14592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936932806.917, "dur": 4.234, + "args": { + "External id": 975549,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936932813.354, "dur": 27.758, + "args": { + "External id": 975550,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 14594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936932872.045, "dur": 6.921, + "args": { + "External id": 975551,"Sequence number": 10552441, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 14595 + } + }, + { + "ph": "s", "id": 25, "pid": 2338708, "tid": 2338708, "ts": 6345936932872.045, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345936932884.349, "dur": 1.441, + "args": { + "External id": 975552,"Sequence number": 10552442, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 14596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345936932915.861, "dur": 125951.031, + "args": { + "External id": 975553,"Sequence number": 10552442, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 14597 + } + }, + { + "ph": "s", "id": 24, "pid": 2338708, "tid": 2338708, "ts": 6345936932915.861, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345936932934.285, "dur": 29.954, + "args": { + "External id": 975554,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345936932935.105, "dur": 28.921, + "args": { + "External id": 975555,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936932936.394, "dur": 5.620, + "args": { + "External id": 975556,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936932937.792, "dur": 3.831, + "args": { + "External id": 975557,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936932942.879, "dur": 20.509, + "args": { + "External id": 975558,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 14602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936932985.162, "dur": 54.463, + "args": { + "External id": 975559,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936932986.299, "dur": 7.439, + "args": { + "External id": 975560,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 14604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936932989.284, "dur": 4.015, + "args": { + "External id": 975561,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345936932994.543, "dur": 44.833, + "args": { + "External id": 975562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 14606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345936932998.463, "dur": 39.976, + "args": { + "External id": 975563,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 14607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936933045.986, "dur": 72.636, + "args": { + "External id": 975564,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345936933046.985, "dur": 7.956, + "args": { + "External id": 975565,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936933048.727, "dur": 5.886, + "args": { + "External id": 975566,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345936933096.742, "dur": 21.646, + "args": { + "External id": 975567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345936933098.941, "dur": 18.882, + "args": { + "External id": 975568,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 14612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345936933126.484, "dur": 25.726, + "args": { + "External id": 975569,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 14613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345936933130.016, "dur": 3.883, + "args": { + "External id": 975570,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345936933134.886, "dur": 16.976, + "args": { + "External id": 975571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 14615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345936933138.083, "dur": 13.323, + "args": { + "External id": 975572,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2338708, + "ts": 6345936933160.709, "dur": 32.910, + "args": { + "External id": 975573,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345936933198.420, "dur": 76.640, + "args": { + "External id": 975574,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345936933202.593, "dur": 71.954, + "args": { + "External id": 975575,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936933209.660, "dur": 1.446, + "args": { + "External id": 975576,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 14620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345936933212.531, "dur": 39.892, + "args": { + "External id": 975577,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345936933214.350, "dur": 37.836, + "args": { + "External id": 975578,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 14622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345936933228.844, "dur": 3.421, + "args": { + "External id": 975579,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345936933233.474, "dur": 18.257, + "args": { + "External id": 975580,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 14624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2338708, + "ts": 6345936933283.700, "dur": 118658.138, + "args": { + "External id": 975581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 14625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2338708, + "ts": 6345936933285.893, "dur": 118644.553, + "args": { + "External id": 975582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 14626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937052018.601, "dur": 56.671, + "args": { + "External id": 975583,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937052043.935, "dur": 4.787, + "args": { + "External id": 975584,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937052090.514, "dur": 243.745, + "args": { + "External id": 975585,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937052094.397, "dur": 15.398, + "args": { + "External id": 975586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937052100.792, "dur": 7.165, + "args": { + "External id": 975587,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937052105.648, "dur": 1.991, + "args": { + "External id": 975588,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937052112.589, "dur": 220.581, + "args": { + "External id": 975589,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937052125.763, "dur": 205.827, + "args": { + "External id": 975590,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937052345.891, "dur": 8.701, + "args": { + "External id": 975591,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937052350.377, "dur": 1.627, + "args": { + "External id": 975592,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937052375.446, "dur": 9.184, + "args": { + "External id": 975593,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937052411.119, "dur": 18.305, + "args": { + "External id": 975594,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937052416.144, "dur": 12.960, + "args": { + "External id": 975595,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937052702.305, "dur": 382.566, + "args": { + "External id": 975596,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937052710.507, "dur": 3.724, + "args": { + "External id": 975597,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937052717.602, "dur": 334.829, + "args": { + "External id": 975598,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937052721.477, "dur": 0.874, + "args": { + "External id": 975599,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937052725.662, "dur": 46.697, + "args": { + "External id": 975600,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937052776.179, "dur": 9.486, + "args": { + "External id": 975601,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937052784.302, "dur": 0.783, + "args": { + "External id": 975602,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937052788.284, "dur": 40.592, + "args": { + "External id": 975603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937052790.244, "dur": 2.159, + "args": { + "External id": 975604,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937052795.871, "dur": 32.629, + "args": { + "External id": 975605,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937052805.530, "dur": 5.598, + "args": { + "External id": 975606,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937052837.662, "dur": 34.916, + "args": { + "External id": 975607,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937052876.224, "dur": 27.238, + "args": { + "External id": 975608,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937052909.055, "dur": 21.311, + "args": { + "External id": 975609,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937052933.824, "dur": 20.929, + "args": { + "External id": 975610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937052958.877, "dur": 34.176, + "args": { + "External id": 975611,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937052962.411, "dur": 2.629, + "args": { + "External id": 975612,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937052972.093, "dur": 0.810, + "args": { + "External id": 975613,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937052996.200, "dur": 36.741, + "args": { + "External id": 975614,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937053036.641, "dur": 14.147, + "args": { + "External id": 975615,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937053097.812, "dur": 4.005, + "args": { + "External id": 975616,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937053113.588, "dur": 5.569, + "args": { + "External id": 975617,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937053116.640, "dur": 1.007, + "args": { + "External id": 975618,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937053245.300, "dur": 93.042, + "args": { + "External id": 975619,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937053346.247, "dur": 13.930, + "args": { + "External id": 975620,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937053353.777, "dur": 4.886, + "args": { + "External id": 975621,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937053363.207, "dur": 48.446, + "args": { + "External id": 975622,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937053419.001, "dur": 7.594, + "args": { + "External id": 975623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937053421.319, "dur": 4.470, + "args": { + "External id": 975624,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937053424.034, "dur": 1.468, + "args": { + "External id": 975625,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937053432.097, "dur": 60.381, + "args": { + "External id": 975626,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937053437.676, "dur": 54.144, + "args": { + "External id": 975627,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937053499.246, "dur": 20.337, + "args": { + "External id": 975628,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937053527.411, "dur": 4.466, + "args": { + "External id": 975629,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937053530.022, "dur": 0.677, + "args": { + "External id": 975630,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937053537.735, "dur": 51.235, + "args": { + "External id": 975631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937053538.906, "dur": 7.474, + "args": { + "External id": 975632,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937053540.362, "dur": 5.385, + "args": { + "External id": 975633,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937053544.844, "dur": 0.749, + "args": { + "External id": 975634,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937053547.309, "dur": 41.287, + "args": { + "External id": 975635,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937053548.208, "dur": 39.782, + "args": { + "External id": 975636,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937053594.457, "dur": 5.038, + "args": { + "External id": 975637,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937053597.359, "dur": 0.659, + "args": { + "External id": 975638,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937053606.728, "dur": 2.325, + "args": { + "External id": 975639,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937053623.473, "dur": 12.099, + "args": { + "External id": 975640,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937053626.160, "dur": 9.063, + "args": { + "External id": 975641,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937053744.314, "dur": 222.868, + "args": { + "External id": 975642,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937053746.882, "dur": 6.246, + "args": { + "External id": 975643,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937053755.296, "dur": 211.204, + "args": { + "External id": 975644,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937053757.237, "dur": 0.351, + "args": { + "External id": 975645,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937053759.004, "dur": 28.579, + "args": { + "External id": 975646,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937053793.621, "dur": 4.193, + "args": { + "External id": 975647,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937053796.691, "dur": 0.866, + "args": { + "External id": 975648,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937053799.067, "dur": 27.526, + "args": { + "External id": 975649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937053800.529, "dur": 1.684, + "args": { + "External id": 975650,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937053803.891, "dur": 22.378, + "args": { + "External id": 975651,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937053810.121, "dur": 2.757, + "args": { + "External id": 975652,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937053828.277, "dur": 24.725, + "args": { + "External id": 975653,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937053855.130, "dur": 14.343, + "args": { + "External id": 975654,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937053872.348, "dur": 16.020, + "args": { + "External id": 975655,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937053889.952, "dur": 13.936, + "args": { + "External id": 975656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937053906.047, "dur": 27.890, + "args": { + "External id": 975657,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937053911.614, "dur": 3.132, + "args": { + "External id": 975658,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937053917.703, "dur": 0.515, + "args": { + "External id": 975659,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937053935.555, "dur": 13.360, + "args": { + "External id": 975660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937053950.222, "dur": 14.806, + "args": { + "External id": 975661,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937053974.407, "dur": 2.257, + "args": { + "External id": 975662,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937053987.551, "dur": 4.113, + "args": { + "External id": 975663,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937053990.327, "dur": 0.476, + "args": { + "External id": 975664,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937054130.711, "dur": 98.884, + "args": { + "External id": 975665,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937054235.996, "dur": 7.280, + "args": { + "External id": 975666,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937054240.147, "dur": 1.210, + "args": { + "External id": 975667,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937054244.837, "dur": 30.898, + "args": { + "External id": 975668,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937054281.364, "dur": 10.594, + "args": { + "External id": 975669,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937054283.171, "dur": 8.026, + "args": { + "External id": 975670,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937054289.508, "dur": 1.415, + "args": { + "External id": 975671,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937054295.088, "dur": 45.794, + "args": { + "External id": 975672,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937054296.287, "dur": 44.110, + "args": { + "External id": 975673,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937054345.507, "dur": 16.233, + "args": { + "External id": 975674,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937054368.083, "dur": 5.026, + "args": { + "External id": 975675,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937054371.397, "dur": 0.704, + "args": { + "External id": 975676,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937054377.655, "dur": 50.316, + "args": { + "External id": 975677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937054382.824, "dur": 4.329, + "args": { + "External id": 975678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937054383.777, "dur": 2.747, + "args": { + "External id": 975679,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937054385.611, "dur": 0.765, + "args": { + "External id": 975680,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937054388.123, "dur": 39.477, + "args": { + "External id": 975681,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937054388.906, "dur": 38.258, + "args": { + "External id": 975682,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937054432.725, "dur": 10.290, + "args": { + "External id": 975683,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937054435.144, "dur": 6.656, + "args": { + "External id": 975684,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937054455.078, "dur": 2.213, + "args": { + "External id": 975685,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937054467.093, "dur": 7.743, + "args": { + "External id": 975686,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937054469.535, "dur": 5.041, + "args": { + "External id": 975687,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937054569.672, "dur": 214.080, + "args": { + "External id": 975688,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937054571.701, "dur": 2.288, + "args": { + "External id": 975689,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937054576.763, "dur": 206.475, + "args": { + "External id": 975690,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937054581.684, "dur": 0.379, + "args": { + "External id": 975691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937054583.351, "dur": 27.295, + "args": { + "External id": 975692,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937054613.158, "dur": 3.585, + "args": { + "External id": 975693,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937054615.741, "dur": 0.774, + "args": { + "External id": 975694,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937054617.829, "dur": 28.827, + "args": { + "External id": 975695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937054619.743, "dur": 2.033, + "args": { + "External id": 975696,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937054623.324, "dur": 23.000, + "args": { + "External id": 975697,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937054629.433, "dur": 3.478, + "args": { + "External id": 975698,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937054648.418, "dur": 27.762, + "args": { + "External id": 975699,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937054678.083, "dur": 15.478, + "args": { + "External id": 975700,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937054699.385, "dur": 14.683, + "args": { + "External id": 975701,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937054715.773, "dur": 14.527, + "args": { + "External id": 975702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937054732.337, "dur": 23.068, + "args": { + "External id": 975703,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937054735.063, "dur": 2.508, + "args": { + "External id": 975704,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937054739.981, "dur": 0.655, + "args": { + "External id": 975705,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937054756.952, "dur": 11.744, + "args": { + "External id": 975706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937054772.942, "dur": 9.396, + "args": { + "External id": 975707,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937054791.265, "dur": 1.828, + "args": { + "External id": 975708,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937054802.252, "dur": 4.090, + "args": { + "External id": 975709,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937054804.929, "dur": 0.584, + "args": { + "External id": 975710,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937054873.748, "dur": 51.870, + "args": { + "External id": 975711,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937054930.348, "dur": 5.268, + "args": { + "External id": 975712,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937054933.833, "dur": 0.755, + "args": { + "External id": 975713,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937054937.060, "dur": 24.883, + "args": { + "External id": 975714,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937054970.085, "dur": 5.518, + "args": { + "External id": 975715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937054971.598, "dur": 3.311, + "args": { + "External id": 975716,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937054973.668, "dur": 1.060, + "args": { + "External id": 975717,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937054978.437, "dur": 61.682, + "args": { + "External id": 975718,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937054979.526, "dur": 59.606, + "args": { + "External id": 975719,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937055045.701, "dur": 52.048, + "args": { + "External id": 975720,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937055106.805, "dur": 9.282, + "args": { + "External id": 975721,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937055113.625, "dur": 1.284, + "args": { + "External id": 975722,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937055121.393, "dur": 59.580, + "args": { + "External id": 975723,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937055122.452, "dur": 8.872, + "args": { + "External id": 975724,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937055124.163, "dur": 6.439, + "args": { + "External id": 975725,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937055129.602, "dur": 0.850, + "args": { + "External id": 975726,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937055132.286, "dur": 48.273, + "args": { + "External id": 975727,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937055136.686, "dur": 43.016, + "args": { + "External id": 975728,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937055186.012, "dur": 4.764, + "args": { + "External id": 975729,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937055188.565, "dur": 0.836, + "args": { + "External id": 975730,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937055198.038, "dur": 1.702, + "args": { + "External id": 975731,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937055209.315, "dur": 10.650, + "args": { + "External id": 975732,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937055211.512, "dur": 8.202, + "args": { + "External id": 975733,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937055321.008, "dur": 200.638, + "args": { + "External id": 975734,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937055323.785, "dur": 2.533, + "args": { + "External id": 975735,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937055328.428, "dur": 192.610, + "args": { + "External id": 975736,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937055333.396, "dur": 0.311, + "args": { + "External id": 975737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937055335.156, "dur": 24.253, + "args": { + "External id": 975738,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937055361.795, "dur": 2.992, + "args": { + "External id": 975739,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937055363.817, "dur": 0.718, + "args": { + "External id": 975740,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937055365.765, "dur": 28.433, + "args": { + "External id": 975741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937055370.473, "dur": 2.105, + "args": { + "External id": 975742,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937055377.012, "dur": 16.920, + "args": { + "External id": 975743,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937055379.762, "dur": 2.402, + "args": { + "External id": 975744,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937055395.917, "dur": 21.628, + "args": { + "External id": 975745,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937055419.589, "dur": 13.218, + "args": { + "External id": 975746,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937055435.840, "dur": 13.638, + "args": { + "External id": 975747,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937055451.024, "dur": 11.658, + "args": { + "External id": 975748,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937055464.558, "dur": 23.459, + "args": { + "External id": 975749,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937055466.987, "dur": 2.063, + "args": { + "External id": 975750,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937055471.994, "dur": 0.746, + "args": { + "External id": 975751,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937055493.080, "dur": 13.080, + "args": { + "External id": 975752,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937055507.635, "dur": 11.924, + "args": { + "External id": 975753,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937055528.745, "dur": 1.952, + "args": { + "External id": 975754,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937055539.523, "dur": 3.593, + "args": { + "External id": 975755,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937055541.936, "dur": 0.338, + "args": { + "External id": 975756,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937055608.843, "dur": 51.578, + "args": { + "External id": 975757,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937055665.616, "dur": 8.232, + "args": { + "External id": 975758,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937055668.688, "dur": 4.072, + "args": { + "External id": 975759,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937055679.207, "dur": 30.827, + "args": { + "External id": 975760,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937055714.707, "dur": 5.694, + "args": { + "External id": 975761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937055716.081, "dur": 3.651, + "args": { + "External id": 975762,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937055718.756, "dur": 0.752, + "args": { + "External id": 975763,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937055723.141, "dur": 43.130, + "args": { + "External id": 975764,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937055724.412, "dur": 41.253, + "args": { + "External id": 975765,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937055770.265, "dur": 15.465, + "args": { + "External id": 975766,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937055795.414, "dur": 4.501, + "args": { + "External id": 975767,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937055798.388, "dur": 0.746, + "args": { + "External id": 975768,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937055804.008, "dur": 52.840, + "args": { + "External id": 975769,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937055805.005, "dur": 7.743, + "args": { + "External id": 975770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937055806.018, "dur": 6.196, + "args": { + "External id": 975771,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937055807.492, "dur": 4.608, + "args": { + "External id": 975772,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937055816.758, "dur": 39.648, + "args": { + "External id": 975773,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937055817.715, "dur": 38.002, + "args": { + "External id": 975774,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937055860.842, "dur": 4.333, + "args": { + "External id": 975775,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937055863.463, "dur": 0.581, + "args": { + "External id": 975776,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937055870.552, "dur": 1.237, + "args": { + "External id": 975777,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937055879.759, "dur": 10.476, + "args": { + "External id": 975778,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937055885.001, "dur": 4.991, + "args": { + "External id": 975779,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937055973.682, "dur": 274.796, + "args": { + "External id": 975780,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937055975.709, "dur": 5.806, + "args": { + "External id": 975781,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937055983.138, "dur": 264.523, + "args": { + "External id": 975782,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937055984.909, "dur": 0.249, + "args": { + "External id": 975783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937055986.603, "dur": 20.386, + "args": { + "External id": 975784,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937056027.101, "dur": 7.656, + "args": { + "External id": 975785,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937056033.316, "dur": 1.020, + "args": { + "External id": 975786,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937056036.134, "dur": 65.461, + "args": { + "External id": 975787,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937056041.533, "dur": 2.562, + "args": { + "External id": 975788,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937056045.658, "dur": 55.631, + "args": { + "External id": 975789,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937056048.480, "dur": 35.776, + "args": { + "External id": 975790,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937056103.623, "dur": 29.265, + "args": { + "External id": 975791,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937056134.413, "dur": 16.276, + "args": { + "External id": 975792,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937056154.210, "dur": 15.466, + "args": { + "External id": 975793,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937056171.407, "dur": 13.882, + "args": { + "External id": 975794,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937056187.947, "dur": 31.037, + "args": { + "External id": 975795,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937056190.579, "dur": 2.550, + "args": { + "External id": 975796,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937056198.243, "dur": 4.605, + "args": { + "External id": 975797,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937056220.294, "dur": 13.614, + "args": { + "External id": 975798,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937056235.296, "dur": 11.273, + "args": { + "External id": 975799,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937056257.829, "dur": 2.478, + "args": { + "External id": 975800,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937056270.532, "dur": 4.373, + "args": { + "External id": 975801,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937056273.403, "dur": 0.614, + "args": { + "External id": 975802,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937056348.607, "dur": 60.979, + "args": { + "External id": 975803,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937056418.290, "dur": 4.655, + "args": { + "External id": 975804,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937056420.957, "dur": 0.699, + "args": { + "External id": 975805,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937056424.605, "dur": 21.808, + "args": { + "External id": 975806,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937056450.924, "dur": 6.719, + "args": { + "External id": 975807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937056452.550, "dur": 4.340, + "args": { + "External id": 975808,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937056455.104, "dur": 1.573, + "args": { + "External id": 975809,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937056463.095, "dur": 42.091, + "args": { + "External id": 975810,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937056464.487, "dur": 40.167, + "args": { + "External id": 975811,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937056509.559, "dur": 15.739, + "args": { + "External id": 975812,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937056531.026, "dur": 4.218, + "args": { + "External id": 975813,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937056533.390, "dur": 0.938, + "args": { + "External id": 975814,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937056539.632, "dur": 50.460, + "args": { + "External id": 975815,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937056540.791, "dur": 8.011, + "args": { + "External id": 975816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937056545.586, "dur": 2.451, + "args": { + "External id": 975817,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937056547.262, "dur": 0.651, + "args": { + "External id": 975818,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937056549.735, "dur": 39.970, + "args": { + "External id": 975819,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937056551.024, "dur": 38.137, + "args": { + "External id": 975820,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937056594.163, "dur": 7.614, + "args": { + "External id": 975821,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937056596.028, "dur": 4.673, + "args": { + "External id": 975822,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937056607.486, "dur": 1.824, + "args": { + "External id": 975823,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937056645.966, "dur": 10.164, + "args": { + "External id": 975824,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937056648.533, "dur": 7.294, + "args": { + "External id": 975825,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937056744.714, "dur": 201.387, + "args": { + "External id": 975826,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937056747.642, "dur": 2.177, + "args": { + "External id": 975827,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937056751.402, "dur": 194.156, + "args": { + "External id": 975828,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937056753.078, "dur": 0.560, + "args": { + "External id": 975829,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937056758.727, "dur": 25.264, + "args": { + "External id": 975830,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937056785.618, "dur": 3.996, + "args": { + "External id": 975831,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937056788.598, "dur": 0.840, + "args": { + "External id": 975832,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937056790.944, "dur": 25.867, + "args": { + "External id": 975833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937056791.912, "dur": 1.756, + "args": { + "External id": 975834,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937056795.135, "dur": 21.415, + "args": { + "External id": 975835,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937056800.874, "dur": 2.677, + "args": { + "External id": 975836,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937056818.397, "dur": 23.768, + "args": { + "External id": 975837,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937056843.417, "dur": 13.808, + "args": { + "External id": 975838,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937056859.899, "dur": 14.878, + "args": { + "External id": 975839,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937056875.932, "dur": 13.633, + "args": { + "External id": 975840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937056897.129, "dur": 22.059, + "args": { + "External id": 975841,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937056899.024, "dur": 2.449, + "args": { + "External id": 975842,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937056904.320, "dur": 0.724, + "args": { + "External id": 975843,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937056920.832, "dur": 12.023, + "args": { + "External id": 975844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937056933.930, "dur": 10.551, + "args": { + "External id": 975845,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937056952.638, "dur": 1.954, + "args": { + "External id": 975846,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937056963.629, "dur": 4.463, + "args": { + "External id": 975847,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937056966.725, "dur": 0.540, + "args": { + "External id": 975848,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937057086.570, "dur": 67.836, + "args": { + "External id": 975849,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937057160.933, "dur": 6.759, + "args": { + "External id": 975850,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937057165.136, "dur": 1.070, + "args": { + "External id": 975851,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937057169.374, "dur": 27.555, + "args": { + "External id": 975852,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937057202.630, "dur": 12.571, + "args": { + "External id": 975853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937057204.212, "dur": 10.281, + "args": { + "External id": 975854,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937057210.335, "dur": 3.844, + "args": { + "External id": 975855,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937057218.349, "dur": 45.197, + "args": { + "External id": 975856,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937057219.846, "dur": 43.187, + "args": { + "External id": 975857,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937057267.922, "dur": 16.097, + "args": { + "External id": 975858,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937057290.639, "dur": 4.591, + "args": { + "External id": 975859,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937057293.404, "dur": 0.807, + "args": { + "External id": 975860,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937057300.109, "dur": 47.580, + "args": { + "External id": 975861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937057303.678, "dur": 3.887, + "args": { + "External id": 975862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937057304.399, "dur": 2.615, + "args": { + "External id": 975863,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937057306.001, "dur": 0.885, + "args": { + "External id": 975864,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937057308.480, "dur": 38.797, + "args": { + "External id": 975865,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937057308.959, "dur": 37.738, + "args": { + "External id": 975866,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937057352.173, "dur": 7.424, + "args": { + "External id": 975867,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937057354.283, "dur": 3.905, + "args": { + "External id": 975868,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937057369.941, "dur": 1.901, + "args": { + "External id": 975869,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937057381.078, "dur": 7.639, + "args": { + "External id": 975870,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937057383.624, "dur": 4.801, + "args": { + "External id": 975871,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937057486.901, "dur": 246.728, + "args": { + "External id": 975872,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937057514.180, "dur": 2.855, + "args": { + "External id": 975873,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937057522.153, "dur": 210.758, + "args": { + "External id": 975874,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937057527.155, "dur": 0.285, + "args": { + "External id": 975875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937057528.821, "dur": 25.126, + "args": { + "External id": 975876,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937057555.694, "dur": 6.787, + "args": { + "External id": 975877,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937057558.544, "dur": 3.685, + "args": { + "External id": 975878,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937057563.581, "dur": 24.249, + "args": { + "External id": 975879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937057564.346, "dur": 1.481, + "args": { + "External id": 975880,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937057567.747, "dur": 19.813, + "args": { + "External id": 975881,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937057570.468, "dur": 3.305, + "args": { + "External id": 975882,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937057589.469, "dur": 24.681, + "args": { + "External id": 975883,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937057615.274, "dur": 15.651, + "args": { + "External id": 975884,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937057638.094, "dur": 16.477, + "args": { + "External id": 975885,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937057656.253, "dur": 13.708, + "args": { + "External id": 975886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937057672.218, "dur": 25.305, + "args": { + "External id": 975887,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937057674.037, "dur": 2.820, + "args": { + "External id": 975888,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937057678.824, "dur": 3.405, + "args": { + "External id": 975889,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937057699.005, "dur": 13.465, + "args": { + "External id": 975890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937057716.482, "dur": 15.385, + "args": { + "External id": 975891,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937057740.759, "dur": 1.888, + "args": { + "External id": 975892,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937057753.123, "dur": 3.796, + "args": { + "External id": 975893,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937057755.605, "dur": 0.404, + "args": { + "External id": 975894,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937057824.021, "dur": 55.687, + "args": { + "External id": 975895,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937057884.592, "dur": 4.571, + "args": { + "External id": 975896,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937057887.388, "dur": 0.651, + "args": { + "External id": 975897,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937057890.458, "dur": 24.573, + "args": { + "External id": 975898,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937057923.132, "dur": 5.736, + "args": { + "External id": 975899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937057924.608, "dur": 3.264, + "args": { + "External id": 975900,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937057926.715, "dur": 0.986, + "args": { + "External id": 975901,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937057931.739, "dur": 41.066, + "args": { + "External id": 975902,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937057932.811, "dur": 39.485, + "args": { + "External id": 975903,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937057976.486, "dur": 14.515, + "args": { + "External id": 975904,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937057996.196, "dur": 6.489, + "args": { + "External id": 975905,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937058001.204, "dur": 0.520, + "args": { + "External id": 975906,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937058006.935, "dur": 109.229, + "args": { + "External id": 975907,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 14951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937058025.819, "dur": 5.647, + "args": { + "External id": 975908,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 14952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937058027.642, "dur": 3.075, + "args": { + "External id": 975909,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 14953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937058029.545, "dur": 0.877, + "args": { + "External id": 975910,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 14954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937058032.020, "dur": 83.662, + "args": { + "External id": 975911,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937058035.616, "dur": 78.796, + "args": { + "External id": 975912,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 14956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937058123.050, "dur": 4.922, + "args": { + "External id": 975913,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937058125.890, "dur": 0.759, + "args": { + "External id": 975914,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937058134.865, "dur": 1.642, + "args": { + "External id": 975915,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 14959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937058145.066, "dur": 10.643, + "args": { + "External id": 975916,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 14960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937058147.615, "dur": 7.810, + "args": { + "External id": 975917,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937058251.179, "dur": 207.530, + "args": { + "External id": 975918,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937058257.059, "dur": 2.523, + "args": { + "External id": 975919,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937058261.085, "dur": 196.889, + "args": { + "External id": 975920,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 14964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937058262.368, "dur": 0.394, + "args": { + "External id": 975921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937058264.809, "dur": 23.637, + "args": { + "External id": 975922,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 14966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937058290.457, "dur": 6.801, + "args": { + "External id": 975923,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 14967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937058296.336, "dur": 0.654, + "args": { + "External id": 975924,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 14968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937058298.630, "dur": 26.666, + "args": { + "External id": 975925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937058299.795, "dur": 1.492, + "args": { + "External id": 975926,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 14970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937058306.389, "dur": 18.664, + "args": { + "External id": 975927,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 14971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937058309.719, "dur": 2.777, + "args": { + "External id": 975928,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937058326.762, "dur": 22.939, + "args": { + "External id": 975929,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937058351.322, "dur": 17.127, + "args": { + "External id": 975930,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937058371.279, "dur": 15.615, + "args": { + "External id": 975931,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 14975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937058388.336, "dur": 13.976, + "args": { + "External id": 975932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 14976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937058404.069, "dur": 20.824, + "args": { + "External id": 975933,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 14977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937058406.063, "dur": 1.828, + "args": { + "External id": 975934,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 14978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937058409.789, "dur": 0.659, + "args": { + "External id": 975935,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 14979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937058429.783, "dur": 13.670, + "args": { + "External id": 975936,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 14980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937058444.622, "dur": 11.988, + "args": { + "External id": 975937,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 14981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937058465.406, "dur": 1.643, + "args": { + "External id": 975938,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 14982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937058475.806, "dur": 4.519, + "args": { + "External id": 975939,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 14983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937058478.969, "dur": 0.510, + "args": { + "External id": 975940,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937058545.296, "dur": 54.039, + "args": { + "External id": 975941,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 14985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937058604.502, "dur": 4.057, + "args": { + "External id": 975942,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 14986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937058606.830, "dur": 0.667, + "args": { + "External id": 975943,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 14987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937058613.871, "dur": 24.966, + "args": { + "External id": 975944,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 14988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937058643.730, "dur": 5.673, + "args": { + "External id": 975945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 14989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937058645.348, "dur": 3.412, + "args": { + "External id": 975946,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 14990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937058647.426, "dur": 1.151, + "args": { + "External id": 975947,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 14991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937058651.943, "dur": 40.801, + "args": { + "External id": 975948,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937058652.873, "dur": 39.258, + "args": { + "External id": 975949,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 14993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937058696.639, "dur": 13.865, + "args": { + "External id": 975950,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 14994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937058720.412, "dur": 32.545, + "args": { + "External id": 975951,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 14995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937058723.937, "dur": 28.609, + "args": { + "External id": 975952,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 14996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937058730.247, "dur": 6.444, + "args": { + "External id": 975953,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 14997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345937058760.758, "dur": 35.119, + "args": { + "External id": 975954,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 14998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345937058763.348, "dur": 32.263, + "args": { + "External id": 975955,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 14999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937058769.286, "dur": 5.627, + "args": { + "External id": 975956,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937058776.282, "dur": 18.831, + "args": { + "External id": 975957,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937058816.699, "dur": 10.111, + "args": { + "External id": 975958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937058822.555, "dur": 3.871, + "args": { + "External id": 975959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937058828.231, "dur": 1.556, + "args": { + "External id": 975960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937058828.986, "dur": 0.736, + "args": { + "External id": 975961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937058902.769, "dur": 46.162, + "args": { + "External id": 975962,"Sequence number": 10552443, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15006 + } + }, + { + "ph": "s", "id": 23, "pid": 2338708, "tid": 2338708, "ts": 6345937058902.769, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937058955.176, "dur": 6.815, + "args": { + "External id": 975963,"Sequence number": 10552444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937058959.355, "dur": 0.978, + "args": { + "External id": 975964,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345937058965.682, "dur": 10.562, + "args": { + "External id": 975965,"Sequence number": 10552444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937058974.435, "dur": 0.687, + "args": { + "External id": 975966,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937058977.771, "dur": 2.915, + "args": { + "External id": 975967,"Sequence number": 10552444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937058979.298, "dur": 0.755, + "args": { + "External id": 975968,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937058985.237, "dur": 7.446, + "args": { + "External id": 975969,"Sequence number": 10552444, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15013 + } + }, + { + "ph": "s", "id": 22, "pid": 2338708, "tid": 2338708, "ts": 6345937058985.237, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937058989.764, "dur": 1.160, + "args": { + "External id": 975970,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937058996.321, "dur": 5.514, + "args": { + "External id": 975971,"Sequence number": 10552445, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15015 + } + }, + { + "ph": "s", "id": 21, "pid": 2338708, "tid": 2338708, "ts": 6345937058996.321, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937059000.477, "dur": 0.532, + "args": { + "External id": 975972,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345937059003.191, "dur": 26.429, + "args": { + "External id": 975973,"Sequence number": 10552446, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15017 + } + }, + { + "ph": "s", "id": 20, "pid": 2338708, "tid": 2338708, "ts": 6345937059003.191, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937059026.634, "dur": 0.891, + "args": { + "External id": 975974,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937059031.535, "dur": 8.156, + "args": { + "External id": 975975,"Sequence number": 10552447, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15019 + } + }, + { + "ph": "s", "id": 19, "pid": 2338708, "tid": 2338708, "ts": 6345937059031.535, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937059035.665, "dur": 3.237, + "args": { + "External id": 975976,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345937059047.175, "dur": 82.734, + "args": { + "External id": 975977,"Sequence number": 10552448, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345937059086.465, "dur": 43.121, + "args": { + "External id": 975978,"Sequence number": 10552448, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937059091.289, "dur": 12.432, + "args": { + "External id": 975979,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937059097.308, "dur": 5.508, + "args": { + "External id": 975980,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937059104.731, "dur": 24.311, + "args": { + "External id": 975981,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937059182.581, "dur": 5.471, + "args": { + "External id": 975982,"Sequence number": 10552448, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15026 + } + }, + { + "ph": "s", "id": 18, "pid": 2338708, "tid": 2338708, "ts": 6345937059182.581, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937059190.516, "dur": 0.982, + "args": { + "External id": 975983,"Sequence number": 10552449, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345937059249.721, "dur": 42889.852, + "args": { + "External id": 975984,"Sequence number": 10552449, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15028 + } + }, + { + "ph": "s", "id": 17, "pid": 2338708, "tid": 2338708, "ts": 6345937059249.721, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345937059272.125, "dur": 31.048, + "args": { + "External id": 975985,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345937059273.203, "dur": 29.786, + "args": { + "External id": 975986,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937059275.126, "dur": 6.712, + "args": { + "External id": 975987,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937059276.495, "dur": 4.982, + "args": { + "External id": 975988,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937059282.594, "dur": 19.972, + "args": { + "External id": 975989,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937059323.383, "dur": 34.210, + "args": { + "External id": 975990,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937059324.615, "dur": 6.814, + "args": { + "External id": 975991,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937059326.952, "dur": 4.174, + "args": { + "External id": 975992,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345937059333.610, "dur": 23.756, + "args": { + "External id": 975993,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937059336.496, "dur": 20.414, + "args": { + "External id": 975994,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937059361.305, "dur": 28.111, + "args": { + "External id": 975995,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937059362.405, "dur": 8.219, + "args": { + "External id": 975996,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937059366.925, "dur": 3.479, + "args": { + "External id": 975997,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345937059371.422, "dur": 17.804, + "args": { + "External id": 975998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937059371.925, "dur": 16.990, + "args": { + "External id": 975999,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345937059399.613, "dur": 17.168, + "args": { + "External id": 976000,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937059401.598, "dur": 3.288, + "args": { + "External id": 976001,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345937059405.644, "dur": 10.872, + "args": { + "External id": 976002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937059406.656, "dur": 9.581, + "args": { + "External id": 976003,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2338708, + "ts": 6345937059428.186, "dur": 28.795, + "args": { + "External id": 976004,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937059460.226, "dur": 59.829, + "args": { + "External id": 976005,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937059462.861, "dur": 56.726, + "args": { + "External id": 976006,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937059468.184, "dur": 3.542, + "args": { + "External id": 976007,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345937059474.245, "dur": 26.771, + "args": { + "External id": 976008,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345937059476.114, "dur": 24.680, + "args": { + "External id": 976009,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937059479.012, "dur": 4.434, + "args": { + "External id": 976010,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937059484.498, "dur": 15.937, + "args": { + "External id": 976011,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2338708, + "ts": 6345937059525.700, "dur": 36403.237, + "args": { + "External id": 976012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2338708, + "ts": 6345937059528.029, "dur": 36398.675, + "args": { + "External id": 976013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937095946.542, "dur": 12.880, + "args": { + "External id": 976014,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937095955.700, "dur": 1.282, + "args": { + "External id": 976015,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937095965.444, "dur": 157.813, + "args": { + "External id": 976016,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937095967.479, "dur": 7.008, + "args": { + "External id": 976017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937095970.192, "dur": 3.134, + "args": { + "External id": 976018,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937095972.243, "dur": 0.807, + "args": { + "External id": 976019,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937095976.410, "dur": 145.826, + "args": { + "External id": 976020,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937095980.847, "dur": 139.978, + "args": { + "External id": 976021,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937096130.386, "dur": 6.142, + "args": { + "External id": 976022,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937096133.784, "dur": 1.104, + "args": { + "External id": 976023,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937096147.044, "dur": 3.082, + "args": { + "External id": 976024,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937096161.805, "dur": 9.566, + "args": { + "External id": 976025,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937096164.523, "dur": 6.558, + "args": { + "External id": 976026,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937096330.791, "dur": 232.722, + "args": { + "External id": 976027,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937096336.716, "dur": 5.322, + "args": { + "External id": 976028,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937096344.725, "dur": 217.912, + "args": { + "External id": 976029,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937096346.918, "dur": 0.694, + "args": { + "External id": 976030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937096349.083, "dur": 30.517, + "args": { + "External id": 976031,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937096382.021, "dur": 5.977, + "args": { + "External id": 976032,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937096386.860, "dur": 0.648, + "args": { + "External id": 976033,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937096389.309, "dur": 28.470, + "args": { + "External id": 976034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937096391.086, "dur": 1.516, + "args": { + "External id": 976035,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937096396.940, "dur": 20.477, + "args": { + "External id": 976036,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937096400.772, "dur": 3.702, + "args": { + "External id": 976037,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937096419.765, "dur": 24.383, + "args": { + "External id": 976038,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937096446.139, "dur": 17.187, + "args": { + "External id": 976039,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937096466.610, "dur": 16.568, + "args": { + "External id": 976040,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937096484.709, "dur": 14.547, + "args": { + "External id": 976041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937096501.361, "dur": 30.352, + "args": { + "External id": 976042,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937096506.134, "dur": 2.236, + "args": { + "External id": 976043,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937096514.833, "dur": 0.704, + "args": { + "External id": 976044,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937096533.267, "dur": 14.292, + "args": { + "External id": 976045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937096549.114, "dur": 12.317, + "args": { + "External id": 976046,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937096571.341, "dur": 2.223, + "args": { + "External id": 976047,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937096581.624, "dur": 4.681, + "args": { + "External id": 976048,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937096584.779, "dur": 0.469, + "args": { + "External id": 976049,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937096666.787, "dur": 74.786, + "args": { + "External id": 976050,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937096747.661, "dur": 10.510, + "args": { + "External id": 976051,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937096753.109, "dur": 0.724, + "args": { + "External id": 976052,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937096760.352, "dur": 28.432, + "args": { + "External id": 976053,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937096795.234, "dur": 11.355, + "args": { + "External id": 976054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937096797.263, "dur": 8.270, + "args": { + "External id": 976055,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937096800.228, "dur": 5.054, + "args": { + "External id": 976056,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937096810.254, "dur": 52.690, + "args": { + "External id": 976057,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937096815.050, "dur": 47.202, + "args": { + "External id": 976058,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937096890.010, "dur": 17.668, + "args": { + "External id": 976059,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937096914.972, "dur": 4.034, + "args": { + "External id": 976060,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937096917.280, "dur": 0.688, + "args": { + "External id": 976061,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937096924.010, "dur": 49.783, + "args": { + "External id": 976062,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937096925.159, "dur": 7.058, + "args": { + "External id": 976063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937096928.678, "dur": 2.932, + "args": { + "External id": 976064,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937096930.733, "dur": 0.715, + "args": { + "External id": 976065,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937096933.107, "dur": 40.262, + "args": { + "External id": 976066,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937096933.768, "dur": 39.004, + "args": { + "External id": 976067,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937096978.774, "dur": 4.068, + "args": { + "External id": 976068,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937096981.074, "dur": 0.543, + "args": { + "External id": 976069,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937096989.159, "dur": 1.475, + "args": { + "External id": 976070,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937097002.349, "dur": 28.752, + "args": { + "External id": 976071,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937097004.934, "dur": 25.404, + "args": { + "External id": 976072,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937097179.110, "dur": 217.714, + "args": { + "External id": 976073,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937097181.928, "dur": 3.658, + "args": { + "External id": 976074,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937097187.291, "dur": 208.913, + "args": { + "External id": 976075,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937097189.232, "dur": 0.518, + "args": { + "External id": 976076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937097194.573, "dur": 27.908, + "args": { + "External id": 976077,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937097224.143, "dur": 3.986, + "args": { + "External id": 976078,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937097226.988, "dur": 0.862, + "args": { + "External id": 976079,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937097229.395, "dur": 29.825, + "args": { + "External id": 976080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937097230.377, "dur": 5.465, + "args": { + "External id": 976081,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937097237.236, "dur": 21.691, + "args": { + "External id": 976082,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937097243.810, "dur": 2.944, + "args": { + "External id": 976083,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937097260.785, "dur": 25.475, + "args": { + "External id": 976084,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937097288.051, "dur": 14.511, + "args": { + "External id": 976085,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937097305.583, "dur": 17.454, + "args": { + "External id": 976086,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937097324.650, "dur": 13.939, + "args": { + "External id": 976087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937097344.229, "dur": 21.435, + "args": { + "External id": 976088,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937097346.006, "dur": 1.930, + "args": { + "External id": 976089,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937097350.428, "dur": 0.795, + "args": { + "External id": 976090,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937097367.181, "dur": 13.543, + "args": { + "External id": 976091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937097381.995, "dur": 12.902, + "args": { + "External id": 976092,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937097404.852, "dur": 2.414, + "args": { + "External id": 976093,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937097418.239, "dur": 4.180, + "args": { + "External id": 976094,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937097421.109, "dur": 0.433, + "args": { + "External id": 976095,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937097503.049, "dur": 63.980, + "args": { + "External id": 976096,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937097572.982, "dur": 7.820, + "args": { + "External id": 976097,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937097575.369, "dur": 4.095, + "args": { + "External id": 976098,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937097582.489, "dur": 25.897, + "args": { + "External id": 976099,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937097613.433, "dur": 10.064, + "args": { + "External id": 976100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937097615.395, "dur": 7.341, + "args": { + "External id": 976101,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937097621.184, "dur": 1.319, + "args": { + "External id": 976102,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937097626.875, "dur": 43.332, + "args": { + "External id": 976103,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937097628.158, "dur": 41.415, + "args": { + "External id": 976104,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937097674.693, "dur": 14.716, + "args": { + "External id": 976105,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937097695.295, "dur": 4.421, + "args": { + "External id": 976106,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937097698.194, "dur": 0.561, + "args": { + "External id": 976107,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937097704.141, "dur": 51.583, + "args": { + "External id": 976108,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937097707.723, "dur": 4.143, + "args": { + "External id": 976109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937097708.731, "dur": 2.529, + "args": { + "External id": 976110,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937097710.277, "dur": 0.841, + "args": { + "External id": 976111,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937097712.697, "dur": 42.716, + "args": { + "External id": 976112,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937097713.182, "dur": 41.508, + "args": { + "External id": 976113,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937097767.244, "dur": 7.201, + "args": { + "External id": 976114,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937097769.664, "dur": 3.483, + "args": { + "External id": 976115,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937097783.690, "dur": 1.320, + "args": { + "External id": 976116,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937097793.075, "dur": 9.375, + "args": { + "External id": 976117,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937097795.372, "dur": 6.781, + "args": { + "External id": 976118,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937097900.650, "dur": 287.325, + "args": { + "External id": 976119,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937097902.856, "dur": 2.448, + "args": { + "External id": 976120,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937097906.824, "dur": 280.338, + "args": { + "External id": 976121,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937097911.890, "dur": 0.319, + "args": { + "External id": 976122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937097913.626, "dur": 25.614, + "args": { + "External id": 976123,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937097940.826, "dur": 3.173, + "args": { + "External id": 976124,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937097943.088, "dur": 0.694, + "args": { + "External id": 976125,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937097945.042, "dur": 27.274, + "args": { + "External id": 976126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937097946.088, "dur": 1.945, + "args": { + "External id": 976127,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937097949.590, "dur": 22.406, + "args": { + "External id": 976128,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937097955.415, "dur": 3.440, + "args": { + "External id": 976129,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937097973.424, "dur": 32.160, + "args": { + "External id": 976130,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937098007.144, "dur": 43.196, + "args": { + "External id": 976131,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937098090.360, "dur": 20.825, + "args": { + "External id": 976132,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937098112.702, "dur": 15.919, + "args": { + "External id": 976133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937098130.714, "dur": 24.362, + "args": { + "External id": 976134,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937098133.015, "dur": 2.582, + "args": { + "External id": 976135,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937098137.764, "dur": 0.864, + "args": { + "External id": 976136,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937098156.420, "dur": 14.138, + "args": { + "External id": 976137,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937098174.313, "dur": 11.566, + "args": { + "External id": 976138,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937098196.946, "dur": 2.589, + "args": { + "External id": 976139,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937098210.877, "dur": 4.183, + "args": { + "External id": 976140,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937098213.533, "dur": 0.634, + "args": { + "External id": 976141,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937098292.353, "dur": 64.481, + "args": { + "External id": 976142,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937098362.509, "dur": 4.958, + "args": { + "External id": 976143,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937098365.517, "dur": 0.736, + "args": { + "External id": 976144,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937098369.215, "dur": 30.183, + "args": { + "External id": 976145,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937098406.938, "dur": 6.160, + "args": { + "External id": 976146,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937098408.673, "dur": 3.585, + "args": { + "External id": 976147,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937098410.730, "dur": 1.284, + "args": { + "External id": 976148,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937098415.975, "dur": 43.536, + "args": { + "External id": 976149,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937098417.074, "dur": 41.901, + "args": { + "External id": 976150,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937098463.765, "dur": 15.630, + "args": { + "External id": 976151,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937098485.872, "dur": 7.194, + "args": { + "External id": 976152,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937098491.495, "dur": 0.607, + "args": { + "External id": 976153,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937098497.362, "dur": 48.730, + "args": { + "External id": 976154,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937098498.545, "dur": 3.290, + "args": { + "External id": 976155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937098499.275, "dur": 2.012, + "args": { + "External id": 976156,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937098500.560, "dur": 0.599, + "args": { + "External id": 976157,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937098502.792, "dur": 42.953, + "args": { + "External id": 976158,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937098506.451, "dur": 38.852, + "args": { + "External id": 976159,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937098550.473, "dur": 7.811, + "args": { + "External id": 976160,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937098552.832, "dur": 4.208, + "args": { + "External id": 976161,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937098563.890, "dur": 1.577, + "args": { + "External id": 976162,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937098573.688, "dur": 7.227, + "args": { + "External id": 976163,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937098575.735, "dur": 4.901, + "args": { + "External id": 976164,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937098672.359, "dur": 195.203, + "args": { + "External id": 976165,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937098674.279, "dur": 5.771, + "args": { + "External id": 976166,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937098682.105, "dur": 184.895, + "args": { + "External id": 976167,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937098683.576, "dur": 0.451, + "args": { + "External id": 976168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937098685.065, "dur": 25.084, + "args": { + "External id": 976169,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937098712.091, "dur": 3.450, + "args": { + "External id": 976170,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937098714.561, "dur": 0.759, + "args": { + "External id": 976171,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937098716.681, "dur": 27.015, + "args": { + "External id": 976172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937098717.849, "dur": 1.790, + "args": { + "External id": 976173,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937098722.762, "dur": 20.649, + "args": { + "External id": 976174,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937098728.178, "dur": 2.568, + "args": { + "External id": 976175,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937098745.505, "dur": 21.661, + "args": { + "External id": 976176,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937098768.786, "dur": 14.240, + "args": { + "External id": 976177,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937098785.690, "dur": 15.519, + "args": { + "External id": 976178,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937098802.491, "dur": 12.582, + "args": { + "External id": 976179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937098816.745, "dur": 21.612, + "args": { + "External id": 976180,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937098818.808, "dur": 2.254, + "args": { + "External id": 976181,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937098823.433, "dur": 0.622, + "args": { + "External id": 976182,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937098842.067, "dur": 12.336, + "args": { + "External id": 976183,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937098855.688, "dur": 10.299, + "args": { + "External id": 976184,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937098874.595, "dur": 1.544, + "args": { + "External id": 976185,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937098885.283, "dur": 3.762, + "args": { + "External id": 976186,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937098887.690, "dur": 0.610, + "args": { + "External id": 976187,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937098952.261, "dur": 48.810, + "args": { + "External id": 976188,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937099006.214, "dur": 24.041, + "args": { + "External id": 976189,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937099026.960, "dur": 1.238, + "args": { + "External id": 976190,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937099035.183, "dur": 60.364, + "args": { + "External id": 976191,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937099103.319, "dur": 6.361, + "args": { + "External id": 976192,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937099104.896, "dur": 3.861, + "args": { + "External id": 976193,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937099107.340, "dur": 1.194, + "args": { + "External id": 976194,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937099112.598, "dur": 48.878, + "args": { + "External id": 976195,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937099113.749, "dur": 46.988, + "args": { + "External id": 976196,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937099166.071, "dur": 15.744, + "args": { + "External id": 976197,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937099191.759, "dur": 4.219, + "args": { + "External id": 976198,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937099194.420, "dur": 0.551, + "args": { + "External id": 976199,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937099200.239, "dur": 49.856, + "args": { + "External id": 976200,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937099201.081, "dur": 6.408, + "args": { + "External id": 976201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937099201.855, "dur": 5.008, + "args": { + "External id": 976202,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937099206.154, "dur": 0.581, + "args": { + "External id": 976203,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937099211.032, "dur": 38.734, + "args": { + "External id": 976204,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937099211.847, "dur": 37.433, + "args": { + "External id": 976205,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937099254.654, "dur": 3.902, + "args": { + "External id": 976206,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937099257.014, "dur": 0.386, + "args": { + "External id": 976207,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937099265.154, "dur": 1.441, + "args": { + "External id": 976208,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937099275.161, "dur": 13.421, + "args": { + "External id": 976209,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937099280.586, "dur": 7.756, + "args": { + "External id": 976210,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937099380.908, "dur": 197.854, + "args": { + "External id": 976211,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937099382.941, "dur": 2.490, + "args": { + "External id": 976212,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937099386.938, "dur": 191.357, + "args": { + "External id": 976213,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937099391.172, "dur": 0.330, + "args": { + "External id": 976214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937099392.851, "dur": 22.113, + "args": { + "External id": 976215,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937099416.884, "dur": 3.483, + "args": { + "External id": 976216,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937099419.189, "dur": 0.865, + "args": { + "External id": 976217,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937099421.411, "dur": 27.824, + "args": { + "External id": 976218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937099429.318, "dur": 1.427, + "args": { + "External id": 976219,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937099432.129, "dur": 16.730, + "args": { + "External id": 976220,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937099434.635, "dur": 3.333, + "args": { + "External id": 976221,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937099450.708, "dur": 20.960, + "args": { + "External id": 976222,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937099473.345, "dur": 13.600, + "args": { + "External id": 976223,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937099489.834, "dur": 13.479, + "args": { + "External id": 976224,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937099504.709, "dur": 18.293, + "args": { + "External id": 976225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937099524.757, "dur": 24.797, + "args": { + "External id": 976226,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937099527.183, "dur": 2.008, + "args": { + "External id": 976227,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937099533.670, "dur": 0.566, + "args": { + "External id": 976228,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937099550.924, "dur": 14.576, + "args": { + "External id": 976229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937099566.660, "dur": 10.645, + "args": { + "External id": 976230,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937099585.987, "dur": 1.944, + "args": { + "External id": 976231,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937099596.898, "dur": 3.754, + "args": { + "External id": 976232,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937099599.263, "dur": 0.511, + "args": { + "External id": 976233,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937099673.363, "dur": 56.654, + "args": { + "External id": 976234,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937099738.056, "dur": 7.124, + "args": { + "External id": 976235,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937099740.783, "dur": 3.055, + "args": { + "External id": 976236,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937099746.956, "dur": 25.399, + "args": { + "External id": 976237,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937099777.078, "dur": 5.087, + "args": { + "External id": 976238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937099778.227, "dur": 3.180, + "args": { + "External id": 976239,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937099780.396, "dur": 0.816, + "args": { + "External id": 976240,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937099787.621, "dur": 43.868, + "args": { + "External id": 976241,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937099788.672, "dur": 42.184, + "args": { + "External id": 976242,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937099852.583, "dur": 16.290, + "args": { + "External id": 976243,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937099876.112, "dur": 4.290, + "args": { + "External id": 976244,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937099878.880, "dur": 0.558, + "args": { + "External id": 976245,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937099884.427, "dur": 52.038, + "args": { + "External id": 976246,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937099885.063, "dur": 8.819, + "args": { + "External id": 976247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937099888.553, "dur": 4.784, + "args": { + "External id": 976248,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937099890.316, "dur": 2.873, + "args": { + "External id": 976249,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937099894.692, "dur": 41.344, + "args": { + "External id": 976250,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937099895.690, "dur": 39.813, + "args": { + "External id": 976251,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937099941.170, "dur": 4.074, + "args": { + "External id": 976252,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937099943.660, "dur": 0.443, + "args": { + "External id": 976253,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937099950.407, "dur": 1.424, + "args": { + "External id": 976254,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937099963.045, "dur": 9.345, + "args": { + "External id": 976255,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937099965.121, "dur": 6.946, + "args": { + "External id": 976256,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937100132.120, "dur": 209.419, + "args": { + "External id": 976257,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937100134.950, "dur": 3.918, + "args": { + "External id": 976258,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937100141.328, "dur": 199.656, + "args": { + "External id": 976259,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937100143.009, "dur": 0.486, + "args": { + "External id": 976260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937100147.578, "dur": 30.882, + "args": { + "External id": 976261,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937100180.624, "dur": 3.960, + "args": { + "External id": 976262,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937100183.039, "dur": 1.081, + "args": { + "External id": 976263,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937100185.852, "dur": 27.242, + "args": { + "External id": 976264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937100187.031, "dur": 1.794, + "args": { + "External id": 976265,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937100190.250, "dur": 22.357, + "args": { + "External id": 976266,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937100196.390, "dur": 2.605, + "args": { + "External id": 976267,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937100214.527, "dur": 27.223, + "args": { + "External id": 976268,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937100243.662, "dur": 14.492, + "args": { + "External id": 976269,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937100260.757, "dur": 15.434, + "args": { + "External id": 976270,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937100277.660, "dur": 12.951, + "args": { + "External id": 976271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937100294.994, "dur": 20.233, + "args": { + "External id": 976272,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937100297.306, "dur": 2.088, + "args": { + "External id": 976273,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937100301.463, "dur": 0.648, + "args": { + "External id": 976274,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937100316.625, "dur": 11.254, + "args": { + "External id": 976275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937100329.194, "dur": 10.292, + "args": { + "External id": 976276,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937100349.247, "dur": 2.262, + "args": { + "External id": 976277,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937100361.639, "dur": 4.115, + "args": { + "External id": 976278,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937100364.424, "dur": 0.500, + "args": { + "External id": 976279,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937100438.835, "dur": 61.773, + "args": { + "External id": 976280,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937100505.707, "dur": 4.905, + "args": { + "External id": 976281,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937100508.505, "dur": 1.064, + "args": { + "External id": 976282,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937100511.907, "dur": 22.963, + "args": { + "External id": 976283,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937100539.700, "dur": 7.631, + "args": { + "External id": 976284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937100541.314, "dur": 5.407, + "args": { + "External id": 976285,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937100545.598, "dur": 0.927, + "args": { + "External id": 976286,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937100550.181, "dur": 42.197, + "args": { + "External id": 976287,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937100551.174, "dur": 40.584, + "args": { + "External id": 976288,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937100596.547, "dur": 15.832, + "args": { + "External id": 976289,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937100618.352, "dur": 3.996, + "args": { + "External id": 976290,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937100620.812, "dur": 0.550, + "args": { + "External id": 976291,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937100626.541, "dur": 50.085, + "args": { + "External id": 976292,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937100630.138, "dur": 5.787, + "args": { + "External id": 976293,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937100630.937, "dur": 4.448, + "args": { + "External id": 976294,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937100632.151, "dur": 3.017, + "args": { + "External id": 976295,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937100636.712, "dur": 39.559, + "args": { + "External id": 976296,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937100637.399, "dur": 38.273, + "args": { + "External id": 976297,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937100681.672, "dur": 6.795, + "args": { + "External id": 976298,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937100684.063, "dur": 3.244, + "args": { + "External id": 976299,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937100697.094, "dur": 1.418, + "args": { + "External id": 976300,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937100706.716, "dur": 12.339, + "args": { + "External id": 976301,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937100715.043, "dur": 3.734, + "args": { + "External id": 976302,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937100805.606, "dur": 187.443, + "args": { + "External id": 976303,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937100809.701, "dur": 2.008, + "args": { + "External id": 976304,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937100813.506, "dur": 179.040, + "args": { + "External id": 976305,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937100818.391, "dur": 0.388, + "args": { + "External id": 976306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937100819.816, "dur": 21.334, + "args": { + "External id": 976307,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937100842.634, "dur": 5.352, + "args": { + "External id": 976308,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937100846.797, "dur": 0.967, + "args": { + "External id": 976309,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937100849.026, "dur": 22.127, + "args": { + "External id": 976310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937100850.303, "dur": 1.538, + "args": { + "External id": 976311,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937100852.928, "dur": 17.874, + "args": { + "External id": 976312,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937100855.099, "dur": 2.609, + "args": { + "External id": 976313,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937100872.330, "dur": 22.580, + "args": { + "External id": 976314,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937100896.194, "dur": 14.925, + "args": { + "External id": 976315,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937100916.356, "dur": 13.645, + "args": { + "External id": 976316,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937100930.969, "dur": 11.558, + "args": { + "External id": 976317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937100944.530, "dur": 21.203, + "args": { + "External id": 976318,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937100948.826, "dur": 1.944, + "args": { + "External id": 976319,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937100952.806, "dur": 0.513, + "args": { + "External id": 976320,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937100967.092, "dur": 10.981, + "args": { + "External id": 976321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937100981.907, "dur": 9.455, + "args": { + "External id": 976322,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937100999.461, "dur": 1.339, + "args": { + "External id": 976323,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937101028.556, "dur": 5.150, + "args": { + "External id": 976324,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937101031.640, "dur": 0.678, + "args": { + "External id": 976325,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937101142.450, "dur": 60.672, + "args": { + "External id": 976326,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937101208.957, "dur": 5.960, + "args": { + "External id": 976327,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937101212.242, "dur": 1.088, + "args": { + "External id": 976328,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937101216.682, "dur": 25.488, + "args": { + "External id": 976329,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937101250.421, "dur": 6.222, + "args": { + "External id": 976330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937101252.179, "dur": 3.820, + "args": { + "External id": 976331,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937101254.563, "dur": 1.200, + "args": { + "External id": 976332,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937101259.731, "dur": 42.688, + "args": { + "External id": 976333,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937101260.981, "dur": 40.827, + "args": { + "External id": 976334,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937101306.974, "dur": 15.274, + "args": { + "External id": 976335,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937101328.621, "dur": 6.176, + "args": { + "External id": 976336,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937101333.470, "dur": 0.542, + "args": { + "External id": 976337,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937101338.975, "dur": 52.866, + "args": { + "External id": 976338,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937101345.870, "dur": 3.630, + "args": { + "External id": 976339,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937101346.651, "dur": 2.232, + "args": { + "External id": 976340,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937101348.065, "dur": 0.691, + "args": { + "External id": 976341,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937101350.195, "dur": 41.081, + "args": { + "External id": 976342,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937101353.492, "dur": 37.329, + "args": { + "External id": 976343,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937101396.166, "dur": 3.738, + "args": { + "External id": 976344,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937101398.280, "dur": 0.588, + "args": { + "External id": 976345,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937101405.786, "dur": 1.754, + "args": { + "External id": 976346,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937101416.259, "dur": 9.553, + "args": { + "External id": 976347,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937101418.423, "dur": 7.111, + "args": { + "External id": 976348,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937101511.521, "dur": 187.514, + "args": { + "External id": 976349,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937101514.075, "dur": 5.898, + "args": { + "External id": 976350,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937101521.476, "dur": 176.990, + "args": { + "External id": 976351,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937101523.128, "dur": 0.446, + "args": { + "External id": 976352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937101524.840, "dur": 29.281, + "args": { + "External id": 976353,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937101555.670, "dur": 2.950, + "args": { + "External id": 976354,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937101557.546, "dur": 0.790, + "args": { + "External id": 976355,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937101559.654, "dur": 26.210, + "args": { + "External id": 976356,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937101560.799, "dur": 1.676, + "args": { + "External id": 976357,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937101566.168, "dur": 19.352, + "args": { + "External id": 976358,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937101571.955, "dur": 2.472, + "args": { + "External id": 976359,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937101587.075, "dur": 20.062, + "args": { + "External id": 976360,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937101608.542, "dur": 12.924, + "args": { + "External id": 976361,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937101623.596, "dur": 12.886, + "args": { + "External id": 976362,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937101637.737, "dur": 11.759, + "args": { + "External id": 976363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937101651.390, "dur": 18.620, + "args": { + "External id": 976364,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937101653.095, "dur": 1.802, + "args": { + "External id": 976365,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937101656.788, "dur": 0.618, + "args": { + "External id": 976366,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937101674.652, "dur": 11.684, + "args": { + "External id": 976367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937101687.575, "dur": 9.794, + "args": { + "External id": 976368,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937101705.889, "dur": 1.600, + "args": { + "External id": 976369,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937101715.800, "dur": 3.385, + "args": { + "External id": 976370,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937101718.036, "dur": 0.414, + "args": { + "External id": 976371,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937101778.484, "dur": 48.235, + "args": { + "External id": 976372,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937101832.025, "dur": 4.151, + "args": { + "External id": 976373,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937101834.350, "dur": 0.779, + "args": { + "External id": 976374,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937101840.066, "dur": 23.230, + "args": { + "External id": 976375,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937101867.428, "dur": 5.144, + "args": { + "External id": 976376,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937101869.006, "dur": 2.882, + "args": { + "External id": 976377,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937101870.764, "dur": 0.985, + "args": { + "External id": 976378,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937101874.810, "dur": 40.149, + "args": { + "External id": 976379,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937101875.918, "dur": 38.281, + "args": { + "External id": 976380,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937101918.786, "dur": 12.870, + "args": { + "External id": 976381,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937101939.269, "dur": 25.013, + "args": { + "External id": 976382,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937101941.578, "dur": 22.310, + "args": { + "External id": 976383,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937101948.932, "dur": 0.784, + "args": { + "External id": 976384,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345937102000.026, "dur": 49.513, + "args": { + "External id": 976385,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345937102001.827, "dur": 47.442, + "args": { + "External id": 976386,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 15430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937102006.363, "dur": 23.638, + "args": { + "External id": 976387,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937102032.106, "dur": 16.522, + "args": { + "External id": 976388,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937102099.839, "dur": 8.983, + "args": { + "External id": 976389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937102105.055, "dur": 3.332, + "args": { + "External id": 976390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937102109.956, "dur": 1.083, + "args": { + "External id": 976391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937102110.292, "dur": 0.479, + "args": { + "External id": 976392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937102160.800, "dur": 29.083, + "args": { + "External id": 976393,"Sequence number": 10552450, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937102191.961, "dur": 12.569, + "args": { + "External id": 976394,"Sequence number": 10552451, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15438 + } + }, + { + "ph": "s", "id": 16, "pid": 2338708, "tid": 2338708, "ts": 6345937102191.961, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937102211.294, "dur": 7.292, + "args": { + "External id": 976395,"Sequence number": 10552452, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937102215.323, "dur": 1.487, + "args": { + "External id": 976396,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345937102221.137, "dur": 9.345, + "args": { + "External id": 976397,"Sequence number": 10552452, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937102228.719, "dur": 0.373, + "args": { + "External id": 976398,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937102231.722, "dur": 2.266, + "args": { + "External id": 976399,"Sequence number": 10552452, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937102232.997, "dur": 0.401, + "args": { + "External id": 976400,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937102237.977, "dur": 5.093, + "args": { + "External id": 976401,"Sequence number": 10552452, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15445 + } + }, + { + "ph": "s", "id": 15, "pid": 2338708, "tid": 2338708, "ts": 6345937102237.977, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937102241.019, "dur": 0.754, + "args": { + "External id": 976402,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937102246.498, "dur": 7.916, + "args": { + "External id": 976403,"Sequence number": 10552453, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15447 + } + }, + { + "ph": "s", "id": 14, "pid": 2338708, "tid": 2338708, "ts": 6345937102246.498, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937102253.396, "dur": 0.266, + "args": { + "External id": 976404,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345937102255.266, "dur": 5.018, + "args": { + "External id": 976405,"Sequence number": 10552454, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15449 + } + }, + { + "ph": "s", "id": 13, "pid": 2338708, "tid": 2338708, "ts": 6345937102255.266, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937102258.629, "dur": 0.848, + "args": { + "External id": 976406,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937102261.224, "dur": 7.072, + "args": { + "External id": 976407,"Sequence number": 10552455, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15451 + } + }, + { + "ph": "s", "id": 12, "pid": 2338708, "tid": 2338708, "ts": 6345937102261.224, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937102263.911, "dur": 3.690, + "args": { + "External id": 976408,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345937102272.057, "dur": 36.407, + "args": { + "External id": 976409,"Sequence number": 10552456, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345937102276.219, "dur": 31.996, + "args": { + "External id": 976410,"Sequence number": 10552456, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937102278.855, "dur": 7.384, + "args": { + "External id": 976411,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937102281.306, "dur": 4.389, + "args": { + "External id": 976412,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937102287.092, "dur": 20.681, + "args": { + "External id": 976413,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937102338.917, "dur": 4.320, + "args": { + "External id": 976414,"Sequence number": 10552456, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15458 + } + }, + { + "ph": "s", "id": 11, "pid": 2338708, "tid": 2338708, "ts": 6345937102338.917, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937102345.396, "dur": 1.237, + "args": { + "External id": 976415,"Sequence number": 10552457, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345937102387.006, "dur": 43459.633, + "args": { + "External id": 976416,"Sequence number": 10552457, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15460 + } + }, + { + "ph": "s", "id": 10, "pid": 2338708, "tid": 2338708, "ts": 6345937102387.006, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345937102405.589, "dur": 24.555, + "args": { + "External id": 976417,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345937102406.382, "dur": 23.545, + "args": { + "External id": 976418,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937102407.637, "dur": 5.410, + "args": { + "External id": 976419,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937102408.998, "dur": 3.608, + "args": { + "External id": 976420,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937102413.817, "dur": 15.701, + "args": { + "External id": 976421,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937102447.450, "dur": 27.339, + "args": { + "External id": 976422,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937102448.788, "dur": 6.143, + "args": { + "External id": 976423,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937102450.443, "dur": 4.199, + "args": { + "External id": 976424,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345937102456.382, "dur": 18.198, + "args": { + "External id": 976425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937102460.471, "dur": 13.673, + "args": { + "External id": 976426,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937102478.591, "dur": 17.185, + "args": { + "External id": 976427,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937102479.178, "dur": 3.877, + "args": { + "External id": 976428,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937102480.191, "dur": 2.647, + "args": { + "External id": 976429,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345937102483.614, "dur": 11.969, + "args": { + "External id": 976430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937102484.040, "dur": 11.239, + "args": { + "External id": 976431,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345937102501.441, "dur": 25.283, + "args": { + "External id": 976432,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937102503.084, "dur": 3.120, + "args": { + "External id": 976433,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345937102506.691, "dur": 19.733, + "args": { + "External id": 976434,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937102516.886, "dur": 9.185, + "args": { + "External id": 976435,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2338708, + "ts": 6345937102531.479, "dur": 26.854, + "args": { + "External id": 976436,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937102561.358, "dur": 47.718, + "args": { + "External id": 976437,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937102563.480, "dur": 45.172, + "args": { + "External id": 976438,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937102568.426, "dur": 0.893, + "args": { + "External id": 976439,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345937102570.542, "dur": 22.839, + "args": { + "External id": 976440,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345937102571.985, "dur": 21.072, + "args": { + "External id": 976441,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937102576.491, "dur": 3.207, + "args": { + "External id": 976442,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937102580.558, "dur": 12.180, + "args": { + "External id": 976443,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2338708, + "ts": 6345937102615.547, "dur": 37390.297, + "args": { + "External id": 976444,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2338708, + "ts": 6345937102617.134, "dur": 37387.690, + "args": { + "External id": 976445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937140030.334, "dur": 6.916, + "args": { + "External id": 976446,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937140034.205, "dur": 1.091, + "args": { + "External id": 976447,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937140042.739, "dur": 132.401, + "args": { + "External id": 976448,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937140044.466, "dur": 5.631, + "args": { + "External id": 976449,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937140046.733, "dur": 2.343, + "args": { + "External id": 976450,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937140048.283, "dur": 0.511, + "args": { + "External id": 976451,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937140076.114, "dur": 98.063, + "args": { + "External id": 976452,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937140078.266, "dur": 95.094, + "args": { + "External id": 976453,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937140179.825, "dur": 4.874, + "args": { + "External id": 976454,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937140182.401, "dur": 0.713, + "args": { + "External id": 976455,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937140193.126, "dur": 2.426, + "args": { + "External id": 976456,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937140205.194, "dur": 10.272, + "args": { + "External id": 976457,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937140210.352, "dur": 4.816, + "args": { + "External id": 976458,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937140357.459, "dur": 205.402, + "args": { + "External id": 976459,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937140360.466, "dur": 2.249, + "args": { + "External id": 976460,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937140364.290, "dur": 197.722, + "args": { + "External id": 976461,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937140366.015, "dur": 0.468, + "args": { + "External id": 976462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937140372.413, "dur": 28.876, + "args": { + "External id": 976463,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937140403.489, "dur": 3.393, + "args": { + "External id": 976464,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937140405.928, "dur": 0.640, + "args": { + "External id": 976465,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937140408.118, "dur": 29.172, + "args": { + "External id": 976466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937140411.361, "dur": 4.212, + "args": { + "External id": 976467,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937140416.827, "dur": 20.092, + "args": { + "External id": 976468,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937140420.717, "dur": 3.820, + "args": { + "External id": 976469,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937140438.981, "dur": 22.505, + "args": { + "External id": 976470,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937140463.203, "dur": 14.109, + "args": { + "External id": 976471,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937140480.298, "dur": 14.837, + "args": { + "External id": 976472,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937140496.846, "dur": 13.453, + "args": { + "External id": 976473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937140512.538, "dur": 22.970, + "args": { + "External id": 976474,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937140514.354, "dur": 2.018, + "args": { + "External id": 976475,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937140521.504, "dur": 0.564, + "args": { + "External id": 976476,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937140537.017, "dur": 11.937, + "args": { + "External id": 976477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937140550.389, "dur": 10.546, + "args": { + "External id": 976478,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937140570.130, "dur": 2.181, + "args": { + "External id": 976479,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937140579.543, "dur": 3.731, + "args": { + "External id": 976480,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937140581.831, "dur": 0.453, + "args": { + "External id": 976481,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937140660.079, "dur": 68.042, + "args": { + "External id": 976482,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937140736.406, "dur": 7.806, + "args": { + "External id": 976483,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937140738.893, "dur": 0.800, + "args": { + "External id": 976484,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937140745.755, "dur": 34.944, + "args": { + "External id": 976485,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937140786.315, "dur": 8.756, + "args": { + "External id": 976486,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937140787.968, "dur": 6.175, + "args": { + "External id": 976487,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937140790.178, "dur": 3.687, + "args": { + "External id": 976488,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937140801.012, "dur": 46.486, + "args": { + "External id": 976489,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937140801.967, "dur": 44.848, + "args": { + "External id": 976490,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937140851.857, "dur": 14.782, + "args": { + "External id": 976491,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937140872.926, "dur": 3.509, + "args": { + "External id": 976492,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937140874.917, "dur": 0.576, + "args": { + "External id": 976493,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937140880.730, "dur": 46.942, + "args": { + "External id": 976494,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937140881.812, "dur": 5.817, + "args": { + "External id": 976495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937140885.258, "dur": 1.784, + "args": { + "External id": 976496,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937140886.408, "dur": 0.465, + "args": { + "External id": 976497,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937140888.188, "dur": 38.974, + "args": { + "External id": 976498,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937140888.888, "dur": 37.688, + "args": { + "External id": 976499,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937140931.758, "dur": 3.374, + "args": { + "External id": 976500,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937140933.397, "dur": 0.481, + "args": { + "External id": 976501,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937140943.666, "dur": 1.351, + "args": { + "External id": 976502,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937140953.030, "dur": 6.405, + "args": { + "External id": 976503,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937140954.631, "dur": 4.498, + "args": { + "External id": 976504,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937141112.337, "dur": 203.397, + "args": { + "External id": 976505,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937141114.988, "dur": 3.052, + "args": { + "External id": 976506,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937141122.289, "dur": 192.932, + "args": { + "External id": 976507,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937141126.681, "dur": 0.385, + "args": { + "External id": 976508,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937141128.144, "dur": 23.480, + "args": { + "External id": 976509,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937141153.468, "dur": 6.575, + "args": { + "External id": 976510,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937141155.748, "dur": 3.928, + "args": { + "External id": 976511,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937141161.232, "dur": 23.658, + "args": { + "External id": 976512,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937141162.430, "dur": 1.269, + "args": { + "External id": 976513,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937141165.087, "dur": 19.549, + "args": { + "External id": 976514,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937141167.987, "dur": 2.606, + "args": { + "External id": 976515,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937141186.339, "dur": 21.294, + "args": { + "External id": 976516,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937141209.143, "dur": 15.832, + "args": { + "External id": 976517,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937141230.551, "dur": 14.533, + "args": { + "External id": 976518,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937141246.646, "dur": 13.844, + "args": { + "External id": 976519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937141262.432, "dur": 24.282, + "args": { + "External id": 976520,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937141264.302, "dur": 2.071, + "args": { + "External id": 976521,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937141268.263, "dur": 3.239, + "args": { + "External id": 976522,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937141288.208, "dur": 13.142, + "args": { + "External id": 976523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937141302.515, "dur": 11.623, + "args": { + "External id": 976524,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937141326.905, "dur": 2.322, + "args": { + "External id": 976525,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937141340.496, "dur": 4.401, + "args": { + "External id": 976526,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937141343.392, "dur": 0.523, + "args": { + "External id": 976527,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937141422.557, "dur": 63.338, + "args": { + "External id": 976528,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937141491.124, "dur": 4.170, + "args": { + "External id": 976529,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937141493.623, "dur": 0.538, + "args": { + "External id": 976530,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937141496.779, "dur": 26.562, + "args": { + "External id": 976531,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937141528.459, "dur": 8.236, + "args": { + "External id": 976532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937141530.129, "dur": 5.842, + "args": { + "External id": 976533,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937141534.655, "dur": 1.117, + "args": { + "External id": 976534,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937141539.802, "dur": 44.092, + "args": { + "External id": 976535,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937141540.908, "dur": 42.296, + "args": { + "External id": 976536,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937141587.964, "dur": 16.500, + "args": { + "External id": 976537,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937141623.949, "dur": 3.694, + "args": { + "External id": 976538,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937141626.025, "dur": 0.630, + "args": { + "External id": 976539,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937141634.617, "dur": 45.174, + "args": { + "External id": 976540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937141635.480, "dur": 3.407, + "args": { + "External id": 976541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937141636.249, "dur": 2.008, + "args": { + "External id": 976542,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937141637.498, "dur": 0.619, + "args": { + "External id": 976543,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937141639.556, "dur": 39.802, + "args": { + "External id": 976544,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937141640.031, "dur": 38.820, + "args": { + "External id": 976545,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937141684.462, "dur": 6.189, + "args": { + "External id": 976546,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937141686.198, "dur": 3.156, + "args": { + "External id": 976547,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937141699.009, "dur": 1.417, + "args": { + "External id": 976548,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937141708.612, "dur": 9.334, + "args": { + "External id": 976549,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937141710.829, "dur": 6.813, + "args": { + "External id": 976550,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937141806.902, "dur": 185.261, + "args": { + "External id": 976551,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937141808.788, "dur": 2.083, + "args": { + "External id": 976552,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937141812.324, "dur": 179.140, + "args": { + "External id": 976553,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937141816.407, "dur": 0.331, + "args": { + "External id": 976554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937141817.854, "dur": 22.370, + "args": { + "External id": 976555,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937141841.711, "dur": 2.931, + "args": { + "External id": 976556,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937141843.796, "dur": 0.682, + "args": { + "External id": 976557,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937141845.703, "dur": 25.755, + "args": { + "External id": 976558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937141846.732, "dur": 1.653, + "args": { + "External id": 976559,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937141849.725, "dur": 21.450, + "args": { + "External id": 976560,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937141855.160, "dur": 2.895, + "args": { + "External id": 976561,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937141872.718, "dur": 21.987, + "args": { + "External id": 976562,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937141896.103, "dur": 11.704, + "args": { + "External id": 976563,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937141913.984, "dur": 14.166, + "args": { + "External id": 976564,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937141929.468, "dur": 11.147, + "args": { + "External id": 976565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937141942.202, "dur": 19.386, + "args": { + "External id": 976566,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937141944.202, "dur": 1.872, + "args": { + "External id": 976567,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937141948.707, "dur": 0.517, + "args": { + "External id": 976568,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937141962.735, "dur": 11.129, + "args": { + "External id": 976569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937141977.683, "dur": 12.831, + "args": { + "External id": 976570,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937141998.648, "dur": 1.821, + "args": { + "External id": 976571,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937142027.763, "dur": 4.855, + "args": { + "External id": 976572,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937142030.343, "dur": 0.751, + "args": { + "External id": 976573,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937142149.782, "dur": 64.072, + "args": { + "External id": 976574,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937142219.461, "dur": 6.054, + "args": { + "External id": 976575,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937142222.755, "dur": 1.180, + "args": { + "External id": 976576,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937142227.107, "dur": 28.379, + "args": { + "External id": 976577,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937142263.258, "dur": 5.296, + "args": { + "External id": 976578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937142264.653, "dur": 3.013, + "args": { + "External id": 976579,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937142266.621, "dur": 0.809, + "args": { + "External id": 976580,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937142271.643, "dur": 41.756, + "args": { + "External id": 976581,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937142272.773, "dur": 40.036, + "args": { + "External id": 976582,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937142317.366, "dur": 15.133, + "args": { + "External id": 976583,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937142338.833, "dur": 6.836, + "args": { + "External id": 976584,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937142344.126, "dur": 0.538, + "args": { + "External id": 976585,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937142349.921, "dur": 56.330, + "args": { + "External id": 976586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937142350.628, "dur": 6.692, + "args": { + "External id": 976587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937142351.431, "dur": 5.296, + "args": { + "External id": 976588,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937142352.909, "dur": 3.597, + "args": { + "External id": 976589,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937142357.912, "dur": 47.899, + "args": { + "External id": 976590,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937142367.150, "dur": 38.044, + "args": { + "External id": 976591,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937142410.534, "dur": 3.584, + "args": { + "External id": 976592,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937142412.360, "dur": 0.591, + "args": { + "External id": 976593,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937142421.200, "dur": 1.698, + "args": { + "External id": 976594,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937142430.978, "dur": 9.609, + "args": { + "External id": 976595,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937142435.788, "dur": 4.527, + "args": { + "External id": 976596,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937142528.436, "dur": 180.764, + "args": { + "External id": 976597,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937142533.428, "dur": 4.175, + "args": { + "External id": 976598,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937142538.987, "dur": 169.451, + "args": { + "External id": 976599,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937142540.351, "dur": 0.420, + "args": { + "External id": 976600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937142542.034, "dur": 20.526, + "args": { + "External id": 976601,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937142564.006, "dur": 5.367, + "args": { + "External id": 976602,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937142568.333, "dur": 0.723, + "args": { + "External id": 976603,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937142570.433, "dur": 22.840, + "args": { + "External id": 976604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937142574.494, "dur": 1.514, + "args": { + "External id": 976605,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937142577.367, "dur": 15.639, + "args": { + "External id": 976606,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937142579.866, "dur": 2.255, + "args": { + "External id": 976607,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937142594.588, "dur": 19.102, + "args": { + "External id": 976608,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937142614.998, "dur": 13.949, + "args": { + "External id": 976609,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937142631.384, "dur": 12.234, + "args": { + "External id": 976610,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937142645.038, "dur": 11.962, + "args": { + "External id": 976611,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937142658.671, "dur": 24.485, + "args": { + "External id": 976612,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937142663.410, "dur": 1.884, + "args": { + "External id": 976613,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937142669.958, "dur": 0.603, + "args": { + "External id": 976614,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937142684.635, "dur": 11.099, + "args": { + "External id": 976615,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937142696.861, "dur": 10.546, + "args": { + "External id": 976616,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937142715.994, "dur": 1.511, + "args": { + "External id": 976617,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937142726.107, "dur": 3.154, + "args": { + "External id": 976618,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937142728.031, "dur": 0.438, + "args": { + "External id": 976619,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937142792.011, "dur": 48.442, + "args": { + "External id": 976620,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937142844.941, "dur": 7.838, + "args": { + "External id": 976621,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937142850.833, "dur": 0.691, + "args": { + "External id": 976622,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937142854.085, "dur": 24.218, + "args": { + "External id": 976623,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937142882.623, "dur": 4.790, + "args": { + "External id": 976624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937142884.028, "dur": 2.717, + "args": { + "External id": 976625,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937142885.448, "dur": 1.090, + "args": { + "External id": 976626,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937142889.989, "dur": 40.431, + "args": { + "External id": 976627,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937142890.884, "dur": 38.969, + "args": { + "External id": 976628,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937142934.004, "dur": 15.256, + "args": { + "External id": 976629,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937142957.378, "dur": 3.704, + "args": { + "External id": 976630,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937142959.542, "dur": 0.691, + "args": { + "External id": 976631,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937142964.535, "dur": 67.232, + "args": { + "External id": 976632,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937142965.246, "dur": 5.409, + "args": { + "External id": 976633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937142965.954, "dur": 4.186, + "args": { + "External id": 976634,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937142969.528, "dur": 0.479, + "args": { + "External id": 976635,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937142971.153, "dur": 59.988, + "args": { + "External id": 976636,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937142971.677, "dur": 58.333, + "args": { + "External id": 976637,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937143038.665, "dur": 4.634, + "args": { + "External id": 976638,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937143041.155, "dur": 0.560, + "args": { + "External id": 976639,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937143049.504, "dur": 1.688, + "args": { + "External id": 976640,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937143093.675, "dur": 10.364, + "args": { + "External id": 976641,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937143098.067, "dur": 5.424, + "args": { + "External id": 976642,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937143197.531, "dur": 204.992, + "args": { + "External id": 976643,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937143202.372, "dur": 2.208, + "args": { + "External id": 976644,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937143206.347, "dur": 195.660, + "args": { + "External id": 976645,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937143207.761, "dur": 0.359, + "args": { + "External id": 976646,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937143209.410, "dur": 24.552, + "args": { + "External id": 976647,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937143235.492, "dur": 5.003, + "args": { + "External id": 976648,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937143239.461, "dur": 0.766, + "args": { + "External id": 976649,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937143241.405, "dur": 25.717, + "args": { + "External id": 976650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937143244.989, "dur": 1.387, + "args": { + "External id": 976651,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937143247.522, "dur": 19.323, + "args": { + "External id": 976652,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937143250.373, "dur": 3.104, + "args": { + "External id": 976653,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937143268.501, "dur": 24.968, + "args": { + "External id": 976654,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937143295.071, "dur": 18.409, + "args": { + "External id": 976655,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937143316.384, "dur": 16.480, + "args": { + "External id": 976656,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937143334.203, "dur": 12.986, + "args": { + "External id": 976657,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937143349.019, "dur": 24.501, + "args": { + "External id": 976658,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937143351.244, "dur": 1.985, + "args": { + "External id": 976659,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937143357.556, "dur": 0.659, + "args": { + "External id": 976660,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937143375.036, "dur": 12.567, + "args": { + "External id": 976661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937143388.623, "dur": 12.393, + "args": { + "External id": 976662,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937143409.278, "dur": 1.926, + "args": { + "External id": 976663,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937143420.837, "dur": 3.801, + "args": { + "External id": 976664,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937143423.316, "dur": 0.425, + "args": { + "External id": 976665,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937143490.682, "dur": 72.986, + "args": { + "External id": 976666,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937143571.099, "dur": 4.284, + "args": { + "External id": 976667,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937143573.365, "dur": 0.742, + "args": { + "External id": 976668,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937143576.787, "dur": 32.131, + "args": { + "External id": 976669,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937143613.647, "dur": 6.530, + "args": { + "External id": 976670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937143615.152, "dur": 3.991, + "args": { + "External id": 976671,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937143617.183, "dur": 1.754, + "args": { + "External id": 976672,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937143625.767, "dur": 42.244, + "args": { + "External id": 976673,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937143626.622, "dur": 40.609, + "args": { + "External id": 976674,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937143671.929, "dur": 16.373, + "args": { + "External id": 976675,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937143694.070, "dur": 3.283, + "args": { + "External id": 976676,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937143695.862, "dur": 0.555, + "args": { + "External id": 976677,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937143701.264, "dur": 45.716, + "args": { + "External id": 976678,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937143702.017, "dur": 5.326, + "args": { + "External id": 976679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937143704.981, "dur": 1.843, + "args": { + "External id": 976680,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937143706.238, "dur": 0.435, + "args": { + "External id": 976681,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937143707.967, "dur": 38.623, + "args": { + "External id": 976682,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937143708.489, "dur": 37.478, + "args": { + "External id": 976683,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937143751.352, "dur": 3.621, + "args": { + "External id": 976684,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937143753.163, "dur": 0.579, + "args": { + "External id": 976685,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937143763.858, "dur": 1.753, + "args": { + "External id": 976686,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937143773.600, "dur": 12.156, + "args": { + "External id": 976687,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937143775.235, "dur": 10.185, + "args": { + "External id": 976688,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937143872.567, "dur": 249.051, + "args": { + "External id": 976689,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937143874.538, "dur": 2.031, + "args": { + "External id": 976690,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937143878.153, "dur": 242.839, + "args": { + "External id": 976691,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937143882.531, "dur": 0.328, + "args": { + "External id": 976692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937143884.013, "dur": 23.890, + "args": { + "External id": 976693,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937143909.331, "dur": 3.081, + "args": { + "External id": 976694,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937143911.332, "dur": 0.832, + "args": { + "External id": 976695,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937143913.538, "dur": 23.093, + "args": { + "External id": 976696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937143914.582, "dur": 1.207, + "args": { + "External id": 976697,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937143916.943, "dur": 19.290, + "args": { + "External id": 976698,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937143921.553, "dur": 2.192, + "args": { + "External id": 976699,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937143937.664, "dur": 19.715, + "args": { + "External id": 976700,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937143958.576, "dur": 14.108, + "args": { + "External id": 976701,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937143977.252, "dur": 13.657, + "args": { + "External id": 976702,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937143992.118, "dur": 14.191, + "args": { + "External id": 976703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937144026.604, "dur": 60.254, + "args": { + "External id": 976704,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937144029.581, "dur": 2.880, + "args": { + "External id": 976705,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937144034.565, "dur": 0.686, + "args": { + "External id": 976706,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937144089.645, "dur": 16.527, + "args": { + "External id": 976707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937144107.634, "dur": 11.925, + "args": { + "External id": 976708,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937144133.306, "dur": 2.317, + "args": { + "External id": 976709,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937144146.238, "dur": 3.975, + "args": { + "External id": 976710,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937144148.689, "dur": 0.643, + "args": { + "External id": 976711,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937144225.481, "dur": 62.455, + "args": { + "External id": 976712,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937144293.606, "dur": 4.539, + "args": { + "External id": 976713,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937144296.084, "dur": 0.859, + "args": { + "External id": 976714,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937144299.830, "dur": 28.766, + "args": { + "External id": 976715,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937144333.875, "dur": 7.298, + "args": { + "External id": 976716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937144335.351, "dur": 4.898, + "args": { + "External id": 976717,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937144339.196, "dur": 0.817, + "args": { + "External id": 976718,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937144343.866, "dur": 42.560, + "args": { + "External id": 976719,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937144344.916, "dur": 40.856, + "args": { + "External id": 976720,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937144390.377, "dur": 15.501, + "args": { + "External id": 976721,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937144411.379, "dur": 3.258, + "args": { + "External id": 976722,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937144413.289, "dur": 0.451, + "args": { + "External id": 976723,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937144420.907, "dur": 48.706, + "args": { + "External id": 976724,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937144421.730, "dur": 5.926, + "args": { + "External id": 976725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937144422.351, "dur": 4.705, + "args": { + "External id": 976726,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937144423.591, "dur": 3.270, + "args": { + "External id": 976727,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937144428.446, "dur": 40.746, + "args": { + "External id": 976728,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937144428.988, "dur": 39.579, + "args": { + "External id": 976729,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937144473.810, "dur": 3.637, + "args": { + "External id": 976730,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937144475.506, "dur": 0.503, + "args": { + "External id": 976731,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937144485.853, "dur": 1.298, + "args": { + "External id": 976732,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937144495.332, "dur": 6.002, + "args": { + "External id": 976733,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937144497.155, "dur": 3.882, + "args": { + "External id": 976734,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937144585.754, "dur": 188.986, + "args": { + "External id": 976735,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937144588.094, "dur": 2.181, + "args": { + "External id": 976736,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937144591.613, "dur": 182.630, + "args": { + "External id": 976737,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937144595.385, "dur": 0.354, + "args": { + "External id": 976738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937144600.506, "dur": 21.333, + "args": { + "External id": 976739,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937144623.295, "dur": 2.614, + "args": { + "External id": 976740,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937144625.129, "dur": 0.610, + "args": { + "External id": 976741,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937144626.803, "dur": 23.510, + "args": { + "External id": 976742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937144627.800, "dur": 3.773, + "args": { + "External id": 976743,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937144632.824, "dur": 17.074, + "args": { + "External id": 976744,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937144635.029, "dur": 2.724, + "args": { + "External id": 976745,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937144651.819, "dur": 22.552, + "args": { + "External id": 976746,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937144675.673, "dur": 12.778, + "args": { + "External id": 976747,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937144693.645, "dur": 13.802, + "args": { + "External id": 976748,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937144708.693, "dur": 11.487, + "args": { + "External id": 976749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937144721.907, "dur": 20.687, + "args": { + "External id": 976750,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937144723.604, "dur": 1.603, + "args": { + "External id": 976751,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937144727.043, "dur": 0.720, + "args": { + "External id": 976752,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937144743.977, "dur": 13.592, + "args": { + "External id": 976753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937144760.994, "dur": 12.087, + "args": { + "External id": 976754,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937144781.042, "dur": 1.604, + "args": { + "External id": 976755,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937144791.007, "dur": 3.187, + "args": { + "External id": 976756,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937144792.989, "dur": 0.426, + "args": { + "External id": 976757,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937144854.909, "dur": 51.365, + "args": { + "External id": 976758,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937144910.967, "dur": 4.330, + "args": { + "External id": 976759,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937144913.367, "dur": 0.785, + "args": { + "External id": 976760,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937144916.808, "dur": 26.307, + "args": { + "External id": 976761,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937144949.951, "dur": 7.076, + "args": { + "External id": 976762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937144951.334, "dur": 5.000, + "args": { + "External id": 976763,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937144952.925, "dur": 3.216, + "args": { + "External id": 976764,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937144959.413, "dur": 39.920, + "args": { + "External id": 976765,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937144960.130, "dur": 38.631, + "args": { + "External id": 976766,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937145003.032, "dur": 35.144, + "args": { + "External id": 976767,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937145046.119, "dur": 40.561, + "args": { + "External id": 976768,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145051.109, "dur": 0.842, + "args": { + "External id": 976769,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937145093.424, "dur": 59.412, + "args": { + "External id": 976770,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937145094.332, "dur": 3.935, + "args": { + "External id": 976771,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937145095.288, "dur": 2.447, + "args": { + "External id": 976772,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145096.872, "dur": 0.737, + "args": { + "External id": 976773,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937145098.943, "dur": 53.292, + "args": { + "External id": 976774,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937145102.554, "dur": 49.013, + "args": { + "External id": 976775,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937145157.789, "dur": 4.163, + "args": { + "External id": 976776,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145160.034, "dur": 0.695, + "args": { + "External id": 976777,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937145169.205, "dur": 1.585, + "args": { + "External id": 976778,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937145178.841, "dur": 12.833, + "args": { + "External id": 976779,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937145183.875, "dur": 7.486, + "args": { + "External id": 976780,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937145302.896, "dur": 184.302, + "args": { + "External id": 976781,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937145305.106, "dur": 2.417, + "args": { + "External id": 976782,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937145311.601, "dur": 175.098, + "args": { + "External id": 976783,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937145312.982, "dur": 0.353, + "args": { + "External id": 976784,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937145314.474, "dur": 24.998, + "args": { + "External id": 976785,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937145341.168, "dur": 5.244, + "args": { + "External id": 976786,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145342.997, "dur": 3.171, + "args": { + "External id": 976787,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937145350.243, "dur": 21.665, + "args": { + "External id": 976788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937145351.325, "dur": 1.287, + "args": { + "External id": 976789,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937145353.812, "dur": 17.816, + "args": { + "External id": 976790,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937145356.539, "dur": 2.463, + "args": { + "External id": 976791,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937145373.175, "dur": 19.496, + "args": { + "External id": 976792,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937145394.143, "dur": 13.632, + "args": { + "External id": 976793,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937145410.172, "dur": 12.921, + "args": { + "External id": 976794,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937145424.411, "dur": 11.719, + "args": { + "External id": 976795,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937145437.809, "dur": 23.412, + "args": { + "External id": 976796,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937145439.410, "dur": 1.738, + "args": { + "External id": 976797,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145445.307, "dur": 2.973, + "args": { + "External id": 976798,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937145462.770, "dur": 11.034, + "args": { + "External id": 976799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937145474.839, "dur": 10.853, + "args": { + "External id": 976800,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937145493.256, "dur": 1.847, + "args": { + "External id": 976801,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937145504.133, "dur": 3.568, + "args": { + "External id": 976802,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145506.627, "dur": 0.341, + "args": { + "External id": 976803,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937145570.529, "dur": 54.478, + "args": { + "External id": 976804,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937145630.129, "dur": 6.740, + "args": { + "External id": 976805,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145635.038, "dur": 0.704, + "args": { + "External id": 976806,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937145638.383, "dur": 28.624, + "args": { + "External id": 976807,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937145671.310, "dur": 5.591, + "args": { + "External id": 976808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937145673.063, "dur": 3.107, + "args": { + "External id": 976809,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145674.759, "dur": 1.248, + "args": { + "External id": 976810,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937145679.784, "dur": 42.113, + "args": { + "External id": 976811,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937145683.212, "dur": 38.112, + "args": { + "External id": 976812,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937145725.781, "dur": 14.644, + "args": { + "External id": 976813,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937145745.183, "dur": 22.741, + "args": { + "External id": 976814,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937145747.422, "dur": 20.128, + "args": { + "External id": 976815,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145752.929, "dur": 0.682, + "args": { + "External id": 976816,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345937145773.270, "dur": 24.845, + "args": { + "External id": 976817,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345937145774.882, "dur": 23.019, + "args": { + "External id": 976818,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 15862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145779.283, "dur": 3.647, + "args": { + "External id": 976819,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937145783.899, "dur": 13.482, + "args": { + "External id": 976820,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937145810.657, "dur": 4.999, + "args": { + "External id": 976821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937145812.621, "dur": 2.749, + "args": { + "External id": 976822,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937145816.637, "dur": 1.158, + "args": { + "External id": 976823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937145817.119, "dur": 0.598, + "args": { + "External id": 976824,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937145866.298, "dur": 21.291, + "args": { + "External id": 976825,"Sequence number": 10552458, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937145891.570, "dur": 12.734, + "args": { + "External id": 976826,"Sequence number": 10552459, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 15870 + } + }, + { + "ph": "s", "id": 9, "pid": 2338708, "tid": 2338708, "ts": 6345937145891.570, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937145909.737, "dur": 5.737, + "args": { + "External id": 976827,"Sequence number": 10552460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 15871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145913.141, "dur": 0.989, + "args": { + "External id": 976828,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345937145918.001, "dur": 5.622, + "args": { + "External id": 976829,"Sequence number": 10552460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "3"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 15873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145921.940, "dur": 0.424, + "args": { + "External id": 976830,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 15874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937145924.805, "dur": 4.849, + "args": { + "External id": 976831,"Sequence number": 10552460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 15875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145928.802, "dur": 0.295, + "args": { + "External id": 976832,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 15876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937145933.635, "dur": 4.933, + "args": { + "External id": 976833,"Sequence number": 10552460, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15877 + } + }, + { + "ph": "s", "id": 8, "pid": 2338708, "tid": 2338708, "ts": 6345937145933.635, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145936.568, "dur": 0.826, + "args": { + "External id": 976834,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937145939.556, "dur": 3.991, + "args": { + "External id": 976835,"Sequence number": 10552461, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 15879 + } + }, + { + "ph": "s", "id": 7, "pid": 2338708, "tid": 2338708, "ts": 6345937145939.556, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145942.551, "dur": 0.273, + "args": { + "External id": 976836,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345937145944.465, "dur": 13.850, + "args": { + "External id": 976837,"Sequence number": 10552462, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 15881 + } + }, + { + "ph": "s", "id": 6, "pid": 2338708, "tid": 2338708, "ts": 6345937145944.465, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145954.883, "dur": 2.581, + "args": { + "External id": 976838,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 15882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937145959.425, "dur": 6.116, + "args": { + "External id": 976839,"Sequence number": 10552463, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 15883 + } + }, + { + "ph": "s", "id": 5, "pid": 2338708, "tid": 2338708, "ts": 6345937145959.425, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937145961.898, "dur": 2.891, + "args": { + "External id": 976840,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 15884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345937145969.202, "dur": 28.479, + "args": { + "External id": 976841,"Sequence number": 10552464, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345937145970.588, "dur": 26.850, + "args": { + "External id": 976842,"Sequence number": 10552464, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937145973.173, "dur": 6.375, + "args": { + "External id": 976843,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 15887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937145975.257, "dur": 3.538, + "args": { + "External id": 976844,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937145980.357, "dur": 16.615, + "args": { + "External id": 976845,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 15889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937146049.511, "dur": 39.675, + "args": { + "External id": 976846,"Sequence number": 10552464, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 15890 + } + }, + { + "ph": "s", "id": 4, "pid": 2338708, "tid": 2338708, "ts": 6345937146049.511, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937146093.962, "dur": 1.358, + "args": { + "External id": 976847,"Sequence number": 10552465, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 15891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345937146130.799, "dur": 44037.144, + "args": { + "External id": 976848,"Sequence number": 10552465, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 15892 + } + }, + { + "ph": "s", "id": 3, "pid": 2338708, "tid": 2338708, "ts": 6345937146130.799, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345937146148.230, "dur": 38.491, + "args": { + "External id": 976849,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345937146149.090, "dur": 37.420, + "args": { + "External id": 976850,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937146150.678, "dur": 12.586, + "args": { + "External id": 976851,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937146155.410, "dur": 7.304, + "args": { + "External id": 976852,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937146163.904, "dur": 22.134, + "args": { + "External id": 976853,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 15897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937146204.936, "dur": 26.656, + "args": { + "External id": 976854,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937146206.344, "dur": 6.394, + "args": { + "External id": 976855,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 15899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937146208.226, "dur": 4.233, + "args": { + "External id": 976856,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345937146214.133, "dur": 17.238, + "args": { + "External id": 976857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 15901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937146215.887, "dur": 15.140, + "args": { + "External id": 976858,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 15902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937146240.438, "dur": 19.955, + "args": { + "External id": 976859,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937146241.047, "dur": 4.472, + "args": { + "External id": 976860,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 15904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937146242.401, "dur": 2.876, + "args": { + "External id": 976861,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345937146248.365, "dur": 11.832, + "args": { + "External id": 976862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937146248.785, "dur": 11.127, + "args": { + "External id": 976863,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 15907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345937146266.323, "dur": 21.444, + "args": { + "External id": 976864,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 15908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937146267.893, "dur": 7.644, + "args": { + "External id": 976865,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345937146276.359, "dur": 11.091, + "args": { + "External id": 976866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 15910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937146276.929, "dur": 10.246, + "args": { + "External id": 976867,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2338708, + "ts": 6345937146292.416, "dur": 21.602, + "args": { + "External id": 976868,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937146316.716, "dur": 50.854, + "args": { + "External id": 976869,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 15913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937146321.118, "dur": 46.047, + "args": { + "External id": 976870,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937146325.623, "dur": 1.021, + "args": { + "External id": 976871,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 15915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345937146327.877, "dur": 23.866, + "args": { + "External id": 976872,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345937146329.526, "dur": 21.989, + "args": { + "External id": 976873,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 15917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937146334.060, "dur": 2.938, + "args": { + "External id": 976874,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937146337.761, "dur": 13.430, + "args": { + "External id": 976875,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 15919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2338708, + "ts": 6345937146371.657, "dur": 37747.581, + "args": { + "External id": 976876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2338708, + "ts": 6345937146373.199, "dur": 37744.207, + "args": { + "External id": 976877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937184135.130, "dur": 8.994, + "args": { + "External id": 976878,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937184140.886, "dur": 1.015, + "args": { + "External id": 976879,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937184152.532, "dur": 114.107, + "args": { + "External id": 976880,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937184154.214, "dur": 6.942, + "args": { + "External id": 976881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937184156.776, "dur": 3.071, + "args": { + "External id": 976882,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937184158.864, "dur": 0.728, + "args": { + "External id": 976883,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937184162.499, "dur": 103.290, + "args": { + "External id": 976884,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937184164.614, "dur": 100.423, + "args": { + "External id": 976885,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937184270.742, "dur": 4.350, + "args": { + "External id": 976886,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937184273.140, "dur": 0.582, + "args": { + "External id": 976887,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937184287.081, "dur": 2.611, + "args": { + "External id": 976888,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937184300.011, "dur": 7.198, + "args": { + "External id": 976889,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937184302.192, "dur": 4.711, + "args": { + "External id": 976890,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937184452.358, "dur": 237.345, + "args": { + "External id": 976891,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937184456.005, "dur": 4.190, + "args": { + "External id": 976892,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937184464.488, "dur": 224.443, + "args": { + "External id": 976893,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937184469.525, "dur": 0.520, + "args": { + "External id": 976894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937184471.478, "dur": 26.519, + "args": { + "External id": 976895,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937184499.734, "dur": 29.381, + "args": { + "External id": 976896,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937184525.317, "dur": 3.435, + "args": { + "External id": 976897,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937184530.433, "dur": 24.089, + "args": { + "External id": 976898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937184531.451, "dur": 1.437, + "args": { + "External id": 976899,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937184534.218, "dur": 20.005, + "args": { + "External id": 976900,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937184538.120, "dur": 3.417, + "args": { + "External id": 976901,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937184559.202, "dur": 22.430, + "args": { + "External id": 976902,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937184583.521, "dur": 15.114, + "args": { + "External id": 976903,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937184601.932, "dur": 15.027, + "args": { + "External id": 976904,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937184618.546, "dur": 12.522, + "args": { + "External id": 976905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937184632.999, "dur": 24.163, + "args": { + "External id": 976906,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937184635.096, "dur": 1.980, + "args": { + "External id": 976907,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937184639.189, "dur": 3.172, + "args": { + "External id": 976908,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937184660.906, "dur": 13.592, + "args": { + "External id": 976909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937184675.817, "dur": 11.994, + "args": { + "External id": 976910,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937184697.921, "dur": 2.106, + "args": { + "External id": 976911,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937184708.289, "dur": 3.566, + "args": { + "External id": 976912,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937184710.604, "dur": 0.339, + "args": { + "External id": 976913,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937184794.834, "dur": 73.438, + "args": { + "External id": 976914,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 15958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937184874.288, "dur": 8.810, + "args": { + "External id": 976915,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937184877.008, "dur": 0.993, + "args": { + "External id": 976916,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937184887.546, "dur": 31.098, + "args": { + "External id": 976917,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 15961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937184924.380, "dur": 6.625, + "args": { + "External id": 976918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 15962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937184926.083, "dur": 3.876, + "args": { + "External id": 976919,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 15963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937184928.117, "dur": 1.549, + "args": { + "External id": 976920,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 15964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937184934.382, "dur": 47.933, + "args": { + "External id": 976921,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937184935.710, "dur": 45.818, + "args": { + "External id": 976922,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 15966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937184987.017, "dur": 17.656, + "args": { + "External id": 976923,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 15967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937185034.066, "dur": 5.161, + "args": { + "External id": 976924,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 15968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937185036.786, "dur": 0.905, + "args": { + "External id": 976925,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 15969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937185045.051, "dur": 116.124, + "args": { + "External id": 976926,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 15970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937185046.200, "dur": 6.332, + "args": { + "External id": 976927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 15971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937185047.054, "dur": 4.830, + "args": { + "External id": 976928,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 15972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937185048.235, "dur": 3.497, + "args": { + "External id": 976929,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 15973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937185103.374, "dur": 57.228, + "args": { + "External id": 976930,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937185104.661, "dur": 55.076, + "args": { + "External id": 976931,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 15975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937185168.836, "dur": 4.891, + "args": { + "External id": 976932,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 15976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937185171.331, "dur": 0.827, + "args": { + "External id": 976933,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 15977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937185182.181, "dur": 2.133, + "args": { + "External id": 976934,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 15978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937185194.735, "dur": 10.146, + "args": { + "External id": 976935,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 15979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937185199.301, "dur": 5.251, + "args": { + "External id": 976936,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937185320.084, "dur": 218.165, + "args": { + "External id": 976937,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937185325.691, "dur": 1.961, + "args": { + "External id": 976938,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937185329.338, "dur": 208.194, + "args": { + "External id": 976939,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 15983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937185331.152, "dur": 0.553, + "args": { + "External id": 976940,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937185333.237, "dur": 25.951, + "args": { + "External id": 976941,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 15985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937185361.101, "dur": 5.684, + "args": { + "External id": 976942,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 15986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937185365.484, "dur": 0.980, + "args": { + "External id": 976943,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 15987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937185370.871, "dur": 26.632, + "args": { + "External id": 976944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937185372.135, "dur": 1.031, + "args": { + "External id": 976945,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 15989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937185374.688, "dur": 22.527, + "args": { + "External id": 976946,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 15990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937185377.704, "dur": 2.248, + "args": { + "External id": 976947,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937185399.198, "dur": 26.897, + "args": { + "External id": 976948,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 15992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937185427.937, "dur": 15.641, + "args": { + "External id": 976949,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 15993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937185446.796, "dur": 15.842, + "args": { + "External id": 976950,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 15994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937185464.292, "dur": 14.839, + "args": { + "External id": 976951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 15995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937185480.945, "dur": 27.176, + "args": { + "External id": 976952,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 15996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937185485.686, "dur": 1.993, + "args": { + "External id": 976953,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 15997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937185492.322, "dur": 0.539, + "args": { + "External id": 976954,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 15998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937185509.822, "dur": 13.293, + "args": { + "External id": 976955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 15999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937185524.325, "dur": 12.239, + "args": { + "External id": 976956,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937185545.610, "dur": 2.172, + "args": { + "External id": 976957,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937185558.348, "dur": 3.985, + "args": { + "External id": 976958,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937185560.719, "dur": 0.713, + "args": { + "External id": 976959,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937185634.413, "dur": 57.211, + "args": { + "External id": 976960,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937185696.844, "dur": 7.210, + "args": { + "External id": 976961,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937185702.186, "dur": 0.716, + "args": { + "External id": 976962,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937185705.761, "dur": 30.615, + "args": { + "External id": 976963,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937185741.457, "dur": 5.776, + "args": { + "External id": 976964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937185743.021, "dur": 3.468, + "args": { + "External id": 976965,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937185744.966, "dur": 1.257, + "args": { + "External id": 976966,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937185760.864, "dur": 46.753, + "args": { + "External id": 976967,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937185764.446, "dur": 42.716, + "args": { + "External id": 976968,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937185811.948, "dur": 16.483, + "args": { + "External id": 976969,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937185834.595, "dur": 3.594, + "args": { + "External id": 976970,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937185836.637, "dur": 0.598, + "args": { + "External id": 976971,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937185842.663, "dur": 49.602, + "args": { + "External id": 976972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937185843.808, "dur": 6.112, + "args": { + "External id": 976973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937185844.722, "dur": 4.557, + "args": { + "External id": 976974,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937185848.504, "dur": 0.642, + "args": { + "External id": 976975,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937185850.839, "dur": 41.035, + "args": { + "External id": 976976,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937185851.660, "dur": 39.628, + "args": { + "External id": 976977,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937185896.988, "dur": 3.298, + "args": { + "External id": 976978,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937185898.635, "dur": 0.613, + "args": { + "External id": 976979,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937185906.135, "dur": 1.238, + "args": { + "External id": 976980,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937185915.277, "dur": 13.144, + "args": { + "External id": 976981,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937185919.546, "dur": 8.487, + "args": { + "External id": 976982,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937186045.030, "dur": 289.266, + "args": { + "External id": 976983,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937186047.967, "dur": 3.128, + "args": { + "External id": 976984,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937186099.444, "dur": 234.042, + "args": { + "External id": 976985,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937186101.982, "dur": 0.530, + "args": { + "External id": 976986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937186104.186, "dur": 40.643, + "args": { + "External id": 976987,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937186146.518, "dur": 3.278, + "args": { + "External id": 976988,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937186148.400, "dur": 1.127, + "args": { + "External id": 976989,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937186153.278, "dur": 32.018, + "args": { + "External id": 976990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937186154.633, "dur": 1.802, + "args": { + "External id": 976991,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937186157.882, "dur": 27.086, + "args": { + "External id": 976992,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937186163.709, "dur": 3.065, + "args": { + "External id": 976993,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937186187.038, "dur": 41.815, + "args": { + "External id": 976994,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937186230.202, "dur": 17.142, + "args": { + "External id": 976995,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937186250.341, "dur": 17.794, + "args": { + "External id": 976996,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937186269.540, "dur": 13.195, + "args": { + "External id": 976997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937186284.780, "dur": 22.599, + "args": { + "External id": 976998,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937186289.190, "dur": 2.206, + "args": { + "External id": 976999,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937186293.439, "dur": 0.642, + "args": { + "External id": 977000,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937186308.812, "dur": 11.524, + "args": { + "External id": 977001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937186321.697, "dur": 10.568, + "args": { + "External id": 977002,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937186344.146, "dur": 2.836, + "args": { + "External id": 977003,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937186357.021, "dur": 3.949, + "args": { + "External id": 977004,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937186359.634, "dur": 0.461, + "args": { + "External id": 977005,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937186438.706, "dur": 61.648, + "args": { + "External id": 977006,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937186505.917, "dur": 3.944, + "args": { + "External id": 977007,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937186508.165, "dur": 0.503, + "args": { + "External id": 977008,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937186511.826, "dur": 27.534, + "args": { + "External id": 977009,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937186544.362, "dur": 7.946, + "args": { + "External id": 977010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937186545.960, "dur": 5.599, + "args": { + "External id": 977011,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937186550.542, "dur": 0.841, + "args": { + "External id": 977012,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937186555.190, "dur": 42.560, + "args": { + "External id": 977013,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937186556.394, "dur": 40.690, + "args": { + "External id": 977014,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937186601.508, "dur": 14.980, + "args": { + "External id": 977015,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937186622.250, "dur": 8.565, + "args": { + "External id": 977016,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937186629.264, "dur": 0.651, + "args": { + "External id": 977017,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937186635.260, "dur": 46.002, + "args": { + "External id": 977018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937186638.917, "dur": 3.039, + "args": { + "External id": 977019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937186639.580, "dur": 1.848, + "args": { + "External id": 977020,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937186640.795, "dur": 0.499, + "args": { + "External id": 977021,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937186642.662, "dur": 38.257, + "args": { + "External id": 977022,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937186643.271, "dur": 36.960, + "args": { + "External id": 977023,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937186685.143, "dur": 6.393, + "args": { + "External id": 977024,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937186687.011, "dur": 3.205, + "args": { + "External id": 977025,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937186699.346, "dur": 1.387, + "args": { + "External id": 977026,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937186708.964, "dur": 9.642, + "args": { + "External id": 977027,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937186711.158, "dur": 7.171, + "args": { + "External id": 977028,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937186806.232, "dur": 189.339, + "args": { + "External id": 977029,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937186808.585, "dur": 1.919, + "args": { + "External id": 977030,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937186815.342, "dur": 179.687, + "args": { + "External id": 977031,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937186816.722, "dur": 0.377, + "args": { + "External id": 977032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937186818.189, "dur": 22.355, + "args": { + "External id": 977033,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937186841.972, "dur": 2.770, + "args": { + "External id": 977034,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937186843.891, "dur": 0.518, + "args": { + "External id": 977035,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937186845.715, "dur": 21.667, + "args": { + "External id": 977036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937186846.640, "dur": 1.382, + "args": { + "External id": 977037,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937186849.230, "dur": 17.852, + "args": { + "External id": 977038,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937186854.324, "dur": 2.224, + "args": { + "External id": 977039,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937186868.623, "dur": 22.370, + "args": { + "External id": 977040,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937186892.237, "dur": 13.390, + "args": { + "External id": 977041,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937186911.697, "dur": 14.667, + "args": { + "External id": 977042,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937186927.522, "dur": 13.764, + "args": { + "External id": 977043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937186943.097, "dur": 20.698, + "args": { + "External id": 977044,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937186944.882, "dur": 1.827, + "args": { + "External id": 977045,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937186948.356, "dur": 0.707, + "args": { + "External id": 977046,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937186965.191, "dur": 13.445, + "args": { + "External id": 977047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937186981.969, "dur": 11.859, + "args": { + "External id": 977048,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937187001.802, "dur": 1.644, + "args": { + "External id": 977049,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937187030.264, "dur": 4.517, + "args": { + "External id": 977050,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937187032.722, "dur": 0.802, + "args": { + "External id": 977051,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937187144.463, "dur": 61.633, + "args": { + "External id": 977052,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937187212.130, "dur": 5.847, + "args": { + "External id": 977053,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937187215.424, "dur": 1.024, + "args": { + "External id": 977054,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937187219.917, "dur": 28.294, + "args": { + "External id": 977055,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937187253.064, "dur": 8.062, + "args": { + "External id": 977056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937187257.132, "dur": 3.024, + "args": { + "External id": 977057,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937187259.017, "dur": 0.932, + "args": { + "External id": 977058,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937187263.854, "dur": 41.830, + "args": { + "External id": 977059,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937187264.815, "dur": 40.223, + "args": { + "External id": 977060,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937187309.731, "dur": 17.924, + "args": { + "External id": 977061,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937187333.121, "dur": 7.164, + "args": { + "External id": 977062,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937187338.709, "dur": 0.639, + "args": { + "External id": 977063,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937187344.632, "dur": 50.192, + "args": { + "External id": 977064,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937187345.848, "dur": 6.403, + "args": { + "External id": 977065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937187346.837, "dur": 4.669, + "args": { + "External id": 977066,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937187350.626, "dur": 0.770, + "args": { + "External id": 977067,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937187353.120, "dur": 41.380, + "args": { + "External id": 977068,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937187354.055, "dur": 39.958, + "args": { + "External id": 977069,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937187399.758, "dur": 3.932, + "args": { + "External id": 977070,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937187401.631, "dur": 0.701, + "args": { + "External id": 977071,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937187412.438, "dur": 1.557, + "args": { + "External id": 977072,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937187422.053, "dur": 6.191, + "args": { + "External id": 977073,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937187423.806, "dur": 4.181, + "args": { + "External id": 977074,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937187517.734, "dur": 196.182, + "args": { + "External id": 977075,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937187520.287, "dur": 2.105, + "args": { + "External id": 977076,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937187526.358, "dur": 186.951, + "args": { + "External id": 977077,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937187530.138, "dur": 0.313, + "args": { + "External id": 977078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937187531.914, "dur": 21.629, + "args": { + "External id": 977079,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937187555.448, "dur": 2.854, + "args": { + "External id": 977080,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937187557.253, "dur": 0.652, + "args": { + "External id": 977081,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937187559.130, "dur": 24.721, + "args": { + "External id": 977082,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937187563.131, "dur": 1.120, + "args": { + "External id": 977083,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937187565.361, "dur": 18.209, + "args": { + "External id": 977084,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937187568.111, "dur": 3.028, + "args": { + "External id": 977085,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937187587.981, "dur": 23.901, + "args": { + "External id": 977086,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937187613.434, "dur": 13.659, + "args": { + "External id": 977087,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937187629.966, "dur": 13.047, + "args": { + "External id": 977088,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937187644.451, "dur": 12.374, + "args": { + "External id": 977089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937187658.659, "dur": 25.187, + "args": { + "External id": 977090,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937187660.893, "dur": 1.952, + "args": { + "External id": 977091,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937187665.084, "dur": 0.954, + "args": { + "External id": 977092,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937187687.222, "dur": 13.051, + "args": { + "External id": 977093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937187701.491, "dur": 10.682, + "args": { + "External id": 977094,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937187721.911, "dur": 1.700, + "args": { + "External id": 977095,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937187732.617, "dur": 3.808, + "args": { + "External id": 977096,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937187735.093, "dur": 0.570, + "args": { + "External id": 977097,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937187807.289, "dur": 53.866, + "args": { + "External id": 977098,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937187866.153, "dur": 4.224, + "args": { + "External id": 977099,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937187868.462, "dur": 0.783, + "args": { + "External id": 977100,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937187874.277, "dur": 25.956, + "args": { + "External id": 977101,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937187904.697, "dur": 7.962, + "args": { + "External id": 977102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937187906.026, "dur": 5.895, + "args": { + "External id": 977103,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937187907.958, "dur": 3.732, + "args": { + "External id": 977104,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937187914.996, "dur": 41.664, + "args": { + "External id": 977105,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937187915.902, "dur": 40.108, + "args": { + "External id": 977106,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937187960.473, "dur": 13.556, + "args": { + "External id": 977107,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937187982.786, "dur": 3.339, + "args": { + "External id": 977108,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937187984.662, "dur": 0.574, + "args": { + "External id": 977109,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937187989.912, "dur": 103.462, + "args": { + "External id": 977110,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937187990.875, "dur": 3.207, + "args": { + "External id": 977111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937187991.788, "dur": 1.797, + "args": { + "External id": 977112,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937187992.878, "dur": 0.593, + "args": { + "External id": 977113,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937187997.386, "dur": 95.335, + "args": { + "External id": 977114,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937187998.006, "dur": 93.585, + "args": { + "External id": 977115,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937188100.127, "dur": 4.989, + "args": { + "External id": 977116,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937188102.827, "dur": 0.685, + "args": { + "External id": 977117,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937188112.678, "dur": 1.885, + "args": { + "External id": 977118,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937188123.494, "dur": 12.956, + "args": { + "External id": 977119,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937188128.345, "dur": 7.828, + "args": { + "External id": 977120,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937188232.458, "dur": 202.427, + "args": { + "External id": 977121,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937188234.781, "dur": 2.044, + "args": { + "External id": 977122,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937188241.292, "dur": 192.846, + "args": { + "External id": 977123,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937188242.677, "dur": 0.380, + "args": { + "External id": 977124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937188244.446, "dur": 24.674, + "args": { + "External id": 977125,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937188270.640, "dur": 5.447, + "args": { + "External id": 977126,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937188272.516, "dur": 3.205, + "args": { + "External id": 977127,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937188279.767, "dur": 22.329, + "args": { + "External id": 977128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937188280.833, "dur": 1.194, + "args": { + "External id": 977129,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937188283.499, "dur": 18.226, + "args": { + "External id": 977130,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937188286.254, "dur": 2.420, + "args": { + "External id": 977131,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937188303.845, "dur": 23.120, + "args": { + "External id": 977132,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937188328.472, "dur": 14.332, + "args": { + "External id": 977133,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937188345.482, "dur": 15.821, + "args": { + "External id": 977134,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937188362.528, "dur": 13.584, + "args": { + "External id": 977135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937188377.840, "dur": 26.814, + "args": { + "External id": 977136,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937188379.844, "dur": 2.163, + "args": { + "External id": 977137,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937188386.140, "dur": 3.303, + "args": { + "External id": 977138,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937188406.155, "dur": 13.199, + "args": { + "External id": 977139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937188420.361, "dur": 12.572, + "args": { + "External id": 977140,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937188442.478, "dur": 1.519, + "args": { + "External id": 977141,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937188452.547, "dur": 3.547, + "args": { + "External id": 977142,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937188454.632, "dur": 0.485, + "args": { + "External id": 977143,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937188520.778, "dur": 57.474, + "args": { + "External id": 977144,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937188583.314, "dur": 6.848, + "args": { + "External id": 977145,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937188587.991, "dur": 0.924, + "args": { + "External id": 977146,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937188591.851, "dur": 28.424, + "args": { + "External id": 977147,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937188624.954, "dur": 5.878, + "args": { + "External id": 977148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937188626.699, "dur": 3.449, + "args": { + "External id": 977149,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937188628.535, "dur": 1.442, + "args": { + "External id": 977150,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937188633.584, "dur": 43.182, + "args": { + "External id": 977151,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937188637.463, "dur": 38.607, + "args": { + "External id": 977152,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937188680.571, "dur": 15.850, + "args": { + "External id": 977153,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937188701.498, "dur": 4.485, + "args": { + "External id": 977154,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937188704.541, "dur": 0.504, + "args": { + "External id": 977155,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937188710.026, "dur": 44.775, + "args": { + "External id": 977156,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937188710.948, "dur": 5.715, + "args": { + "External id": 977157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937188711.747, "dur": 4.362, + "args": { + "External id": 977158,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937188715.416, "dur": 0.573, + "args": { + "External id": 977159,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937188717.502, "dur": 36.929, + "args": { + "External id": 977160,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937188718.459, "dur": 35.566, + "args": { + "External id": 977161,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937188759.198, "dur": 6.930, + "args": { + "External id": 977162,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937188761.364, "dur": 3.700, + "args": { + "External id": 977163,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937188771.923, "dur": 1.419, + "args": { + "External id": 977164,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937188780.720, "dur": 8.127, + "args": { + "External id": 977165,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937188785.002, "dur": 3.577, + "args": { + "External id": 977166,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937188866.136, "dur": 238.641, + "args": { + "External id": 977167,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937188871.001, "dur": 1.917, + "args": { + "External id": 977168,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937188874.405, "dur": 229.447, + "args": { + "External id": 977169,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937188875.794, "dur": 0.355, + "args": { + "External id": 977170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937188877.148, "dur": 19.159, + "args": { + "External id": 977171,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937188898.098, "dur": 5.708, + "args": { + "External id": 977172,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937188902.824, "dur": 0.701, + "args": { + "External id": 977173,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937188907.610, "dur": 20.125, + "args": { + "External id": 977174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937188908.778, "dur": 1.428, + "args": { + "External id": 977175,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937188911.295, "dur": 16.161, + "args": { + "External id": 977176,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937188913.345, "dur": 3.170, + "args": { + "External id": 977177,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937188929.132, "dur": 21.682, + "args": { + "External id": 977178,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937188952.158, "dur": 12.148, + "args": { + "External id": 977179,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937188967.242, "dur": 12.248, + "args": { + "External id": 977180,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937188980.734, "dur": 11.390, + "args": { + "External id": 977181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937188993.996, "dur": 45.935, + "args": { + "External id": 977182,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937189001.191, "dur": 1.830, + "args": { + "External id": 977183,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937189006.286, "dur": 1.046, + "args": { + "External id": 977184,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937189042.491, "dur": 46.498, + "args": { + "External id": 977185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937189091.090, "dur": 11.070, + "args": { + "External id": 977186,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937189113.516, "dur": 2.757, + "args": { + "External id": 977187,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937189125.896, "dur": 5.029, + "args": { + "External id": 977188,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937189129.269, "dur": 0.603, + "args": { + "External id": 977189,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937189204.062, "dur": 61.981, + "args": { + "External id": 977190,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937189270.921, "dur": 4.889, + "args": { + "External id": 977191,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937189273.934, "dur": 0.743, + "args": { + "External id": 977192,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937189277.367, "dur": 27.510, + "args": { + "External id": 977193,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937189309.267, "dur": 8.887, + "args": { + "External id": 977194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937189310.683, "dur": 6.806, + "args": { + "External id": 977195,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937189316.060, "dur": 1.241, + "args": { + "External id": 977196,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937189321.029, "dur": 41.622, + "args": { + "External id": 977197,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937189322.205, "dur": 39.955, + "args": { + "External id": 977198,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937189366.518, "dur": 13.917, + "args": { + "External id": 977199,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937189386.428, "dur": 4.375, + "args": { + "External id": 977200,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937189389.257, "dur": 0.674, + "args": { + "External id": 977201,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345937189394.677, "dur": 45.633, + "args": { + "External id": 977202,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 16246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937189398.057, "dur": 3.431, + "args": { + "External id": 977203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937189398.638, "dur": 2.307, + "args": { + "External id": 977204,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 16248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937189400.068, "dur": 0.750, + "args": { + "External id": 977205,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 16249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937189402.028, "dur": 37.915, + "args": { + "External id": 977206,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937189402.823, "dur": 36.614, + "args": { + "External id": 977207,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 16251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937189444.247, "dur": 3.772, + "args": { + "External id": 977208,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937189446.252, "dur": 0.585, + "args": { + "External id": 977209,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937189455.571, "dur": 1.318, + "args": { + "External id": 977210,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 16254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937189464.191, "dur": 6.939, + "args": { + "External id": 977211,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 16255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937189466.730, "dur": 4.155, + "args": { + "External id": 977212,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937189553.045, "dur": 204.103, + "args": { + "External id": 977213,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937189557.070, "dur": 4.975, + "args": { + "External id": 977214,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345937189565.557, "dur": 191.131, + "args": { + "External id": 977215,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 16259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345937189567.095, "dur": 0.549, + "args": { + "External id": 977216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345937189568.864, "dur": 23.490, + "args": { + "External id": 977217,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 16261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345937189593.665, "dur": 5.533, + "args": { + "External id": 977218,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 16262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937189598.304, "dur": 0.603, + "args": { + "External id": 977219,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 16263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937189600.015, "dur": 21.683, + "args": { + "External id": 977220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345937189601.311, "dur": 1.083, + "args": { + "External id": 977221,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345937189603.534, "dur": 17.811, + "args": { + "External id": 977222,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 16266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937189605.790, "dur": 2.614, + "args": { + "External id": 977223,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345937189623.008, "dur": 22.621, + "args": { + "External id": 977224,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937189647.110, "dur": 16.781, + "args": { + "External id": 977225,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345937189668.962, "dur": 15.411, + "args": { + "External id": 977226,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 16270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345937189685.523, "dur": 13.096, + "args": { + "External id": 977227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 16271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937189700.252, "dur": 24.350, + "args": { + "External id": 977228,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 16272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345937189702.571, "dur": 1.583, + "args": { + "External id": 977229,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937189706.372, "dur": 0.638, + "args": { + "External id": 977230,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345937189726.180, "dur": 13.856, + "args": { + "External id": 977231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 16275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937189743.398, "dur": 12.120, + "args": { + "External id": 977232,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 16276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345937189763.839, "dur": 1.799, + "args": { + "External id": 977233,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937189773.137, "dur": 3.625, + "args": { + "External id": 977234,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 16278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937189775.716, "dur": 0.322, + "args": { + "External id": 977235,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937189836.964, "dur": 49.300, + "args": { + "External id": 977236,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 16280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345937189890.788, "dur": 5.362, + "args": { + "External id": 977237,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 16281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937189894.267, "dur": 0.820, + "args": { + "External id": 977238,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 16282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937189897.197, "dur": 25.508, + "args": { + "External id": 977239,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 16283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345937189926.782, "dur": 8.130, + "args": { + "External id": 977240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 16284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345937189930.678, "dur": 3.517, + "args": { + "External id": 977241,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 16285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937189932.933, "dur": 1.099, + "args": { + "External id": 977242,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 16286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345937189937.533, "dur": 39.181, + "args": { + "External id": 977243,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345937189938.325, "dur": 37.775, + "args": { + "External id": 977244,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 16288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937189980.006, "dur": 14.728, + "args": { + "External id": 977245,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937189999.031, "dur": 45.997, + "args": { + "External id": 977246,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 16290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345937190001.763, "dur": 42.864, + "args": { + "External id": 977247,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 16291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937190006.875, "dur": 20.788, + "args": { + "External id": 977248,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 16292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345937190051.854, "dur": 67.777, + "args": { + "External id": 977249,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 16293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345937190090.336, "dur": 29.027, + "args": { + "External id": 977250,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 16294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937190096.236, "dur": 5.046, + "args": { + "External id": 977251,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345937190102.617, "dur": 16.154, + "args": { + "External id": 977252,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 16296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937190133.952, "dur": 5.267, + "args": { + "External id": 977253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 16297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937190135.603, "dur": 3.081, + "args": { + "External id": 977254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 16298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937190140.245, "dur": 3.909, + "args": { + "External id": 977255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345937190143.582, "dur": 0.475, + "args": { + "External id": 977256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 16300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937190188.767, "dur": 23.423, + "args": { + "External id": 977257,"Sequence number": 10552466, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 16301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345937190214.123, "dur": 13.243, + "args": { + "External id": 977258,"Sequence number": 10552467, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 16302 + } + }, + { + "ph": "s", "id": 2, "pid": 2338708, "tid": 2338708, "ts": 6345937190214.123, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345937190417.954, "dur": 74.099, + "args": { + "External id": 977259,"Record function id": 0, "Ev Idx": 16303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338708, "tid": 2338708, + "ts": 6345937190639.032, "dur": 37.426, + "args": { + "External id": 977260,"Sequence number": 10552468, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16304 + } + }, + { + "ph": "s", "id": 1, "pid": 2338708, "tid": 2338708, "ts": 6345937190639.032, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937190770.154, "dur": 43.519, + "args": { + "External id": 977261,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 16305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345937190786.985, "dur": 8.590, + "args": { + "External id": 977262,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 16306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345937190790.367, "dur": 4.426, + "args": { + "External id": 977263,"Record function id": 0, "Concrete Inputs": ["[1]", "[1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345937190797.280, "dur": 16.067, + "args": { + "External id": 977264,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338708, "tid": 2338708, + "ts": 6345939290985.730, "dur": 147.552, + "args": { + "External id": 977265,"Sequence number": 10552469, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 16309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338708, "tid": 2338708, + "ts": 6345939291153.563, "dur": 29.083, + "args": { + "External id": 977266,"Sequence number": 10552470, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 16310 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 2338708, "tid": 2338708, + "ts": 6345939291242.363, "dur": 3620.998, + "args": { + "External id": 977267,"Record function id": 0, "Ev Idx": 16311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939293110.936, "dur": 8.675, + "args": { + "External id": 977268,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338708, "tid": 2338708, + "ts": 6345939293137.195, "dur": 7.803, + "args": { + "External id": 977269,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 16313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939293590.278, "dur": 5.032, + "args": { + "External id": 977270,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338708, "tid": 2338708, + "ts": 6345939293601.081, "dur": 2.870, + "args": { + "External id": 977271,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 16315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939293974.185, "dur": 1.987, + "args": { + "External id": 977272,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2338708, "tid": 2338708, + "ts": 6345939293983.857, "dur": 2.224, + "args": { + "External id": 977273,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[8, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 16317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939295488.459, "dur": 18.587, + "args": { + "External id": 977274,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 16318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939295499.295, "dur": 3.193, + "args": { + "External id": 977275,"Record function id": 0, "Concrete Inputs": ["", "[8, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 16319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939295508.956, "dur": 4.924, + "args": { + "External id": 977276,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], []], "Ev Idx": 16320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939295511.469, "dur": 1.260, + "args": { + "External id": 977277,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 16321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939295553.803, "dur": 13311.173, + "args": { + "External id": 977278,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], [], []], "Ev Idx": 16322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939295560.897, "dur": 13303.146, + "args": { + "External id": 977279,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 16323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939295567.803, "dur": 14.862, + "args": { + "External id": 977280,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939295584.732, "dur": 13277.144, + "args": { + "External id": 977281,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939295595.554, "dur": 0.503, + "args": { + "External id": 977282,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 16326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939295599.313, "dur": 12.368, + "args": { + "External id": 977283,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[8, 4096], [8, 4096]], "Ev Idx": 16327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338708, "tid": 2338708, + "ts": 6345939295602.988, "dur": 8.476, + "args": { + "External id": 977284,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[8, 4096], [], []], "Ev Idx": 16328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939295610.076, "dur": 0.988, + "args": { + "External id": 977285,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345939295613.877, "dur": 155.785, + "args": { + "External id": 977286,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 16330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345939295615.761, "dur": 153.552, + "args": { + "External id": 977287,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 16331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939295617.858, "dur": 8.666, + "args": { + "External id": 977288,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 16332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939295620.494, "dur": 5.549, + "args": { + "External id": 977289,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939295627.346, "dur": 141.545, + "args": { + "External id": 977290,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939295771.315, "dur": 13084.470, + "args": { + "External id": 977291,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939308892.500, "dur": 443.810, + "args": { + "External id": 977292,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 16336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939308894.683, "dur": 441.034, + "args": { + "External id": 977293,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], []], "Ev Idx": 16337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939308901.371, "dur": 12.560, + "args": { + "External id": 977294,"Record function id": 0, "Concrete Inputs": ["[8, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939308915.664, "dur": 418.072, + "args": { + "External id": 977295,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[8, 8192], [8, 8192], []], "Ev Idx": 16339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338708, "tid": 2338708, + "ts": 6345939309365.710, "dur": 56.013, + "args": { + "External id": 977296,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939309371.338, "dur": 8.318, + "args": { + "External id": 977297,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2338708, "tid": 2338708, + "ts": 6345939309382.689, "dur": 38.718, + "args": { + "External id": 977298,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 16342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345939309387.131, "dur": 8.000, + "args": { + "External id": 977299,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 16343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 2338708, "tid": 2338708, + "ts": 6345939309437.832, "dur": 72.163, + "args": { + "External id": 977300,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338708, "tid": 2338708, + "ts": 6345939309442.318, "dur": 5.643, + "args": { + "External id": 977301,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 16345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939309445.588, "dur": 2.057, + "args": { + "External id": 977302,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 16346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939309449.577, "dur": 4.066, + "args": { + "External id": 977303,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2338708, + "ts": 6345939309456.716, "dur": 2.396, + "args": { + "External id": 977304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[8, 4096]], "Ev Idx": 16348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338708, "tid": 2338708, + "ts": 6345939309464.350, "dur": 7.275, + "args": { + "External id": 977305,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939309470.587, "dur": 0.901, + "args": { + "External id": 977306,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 16350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338708, "tid": 2338708, + "ts": 6345939309472.927, "dur": 3.218, + "args": { + "External id": 977307,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 16351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939309474.989, "dur": 1.067, + "args": { + "External id": 977308,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[8, 4096, 1], [], [], []], "Ev Idx": 16352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939309478.506, "dur": 4.960, + "args": { + "External id": 977309,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [8, 1, 1, 4096]], "Ev Idx": 16353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2338708, "tid": 2338708, + "ts": 6345939309480.214, "dur": 3.121, + "args": { + "External id": 977310,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 16354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939309482.294, "dur": 0.936, + "args": { + "External id": 977311,"Record function id": 0, "Concrete Inputs": ["", "[8, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 16355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939309484.599, "dur": 24.729, + "args": { + "External id": 977312,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[8, 1, 1, 4096], [8, 1, 1, 4096], []], "Ev Idx": 16356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939309518.713, "dur": 31.102, + "args": { + "External id": 977313,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 16357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939309520.592, "dur": 29.061, + "args": { + "External id": 977314,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], [], []], "Ev Idx": 16358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939309526.694, "dur": 4.141, + "args": { + "External id": 977315,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939309531.711, "dur": 17.359, + "args": { + "External id": 977316,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 16360 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939309721.465, "dur": 184.832, + "args": { + "External id": 977317,"Record function id": 0, "Ev Idx": 16361 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 2338708, "tid": 2338708, + "ts": 6345939309827.592, "dur": 67.756, + "args": { + "External id": 977318,"Record function id": 0, "Ev Idx": 16362 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939309916.607, "dur": 58.402, + "args": { + "External id": 977319,"Record function id": 0, "Ev Idx": 16363 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939309986.527, "dur": 14644.191, + "args": { + "External id": 977320,"Record function id": 0, "Ev Idx": 16364 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 2338708, "tid": 2338708, + "ts": 6345939309995.214, "dur": 2523.441, + "args": { + "External id": 977321,"Record function id": 0, "Ev Idx": 16365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939310249.640, "dur": 17.276, + "args": { + "External id": 977322,"Record function id": 0, "Concrete Inputs": ["[141824512]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939310299.437, "dur": 210.665, + "args": { + "External id": 977323,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 16367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310308.246, "dur": 2.440, + "args": { + "External id": 977324,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310317.657, "dur": 0.837, + "args": { + "External id": 977325,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "16384000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310321.388, "dur": 0.966, + "args": { + "External id": 977326,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "16384512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310325.042, "dur": 2.966, + "args": { + "External id": 977327,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "18481664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310330.190, "dur": 0.706, + "args": { + "External id": 977328,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "19005952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310333.418, "dur": 0.591, + "args": { + "External id": 977329,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "19530240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310339.540, "dur": 0.341, + "args": { + "External id": 977330,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "21627392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310342.164, "dur": 3.426, + "args": { + "External id": 977331,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "21627904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310347.601, "dur": 0.851, + "args": { + "External id": 977332,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "28967936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310353.369, "dur": 0.518, + "args": { + "External id": 977333,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "36307968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310356.731, "dur": 0.500, + "args": { + "External id": 977334,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "43648000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310359.535, "dur": 3.061, + "args": { + "External id": 977335,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "43648512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310365.646, "dur": 0.726, + "args": { + "External id": 977336,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "45745664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310368.824, "dur": 0.561, + "args": { + "External id": 977337,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "46269952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310373.955, "dur": 0.557, + "args": { + "External id": 977338,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "46794240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310376.809, "dur": 3.234, + "args": { + "External id": 977339,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "48891392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310382.069, "dur": 0.836, + "args": { + "External id": 977340,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "48891904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310388.406, "dur": 0.392, + "args": { + "External id": 977341,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "56231936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310391.441, "dur": 0.886, + "args": { + "External id": 977342,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "63571968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310394.363, "dur": 2.818, + "args": { + "External id": 977343,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "70912000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310399.632, "dur": 0.562, + "args": { + "External id": 977344,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "70912512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310404.570, "dur": 0.342, + "args": { + "External id": 977345,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73009664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310409.842, "dur": 0.381, + "args": { + "External id": 977346,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "73533952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310412.394, "dur": 3.179, + "args": { + "External id": 977347,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "74058240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310418.125, "dur": 0.370, + "args": { + "External id": 977348,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "76155392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310422.852, "dur": 0.363, + "args": { + "External id": 977349,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "76155904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310425.295, "dur": 0.459, + "args": { + "External id": 977350,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "83495936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310428.950, "dur": 3.035, + "args": { + "External id": 977351,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "90835968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310433.815, "dur": 2.902, + "args": { + "External id": 977352,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "98176000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310439.253, "dur": 0.464, + "args": { + "External id": 977353,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "98176512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310444.490, "dur": 0.518, + "args": { + "External id": 977354,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100273664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310447.389, "dur": 2.896, + "args": { + "External id": 977355,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "100797952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310452.348, "dur": 0.478, + "args": { + "External id": 977356,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "101322240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310457.388, "dur": 0.432, + "args": { + "External id": 977357,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "103419392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310460.002, "dur": 0.710, + "args": { + "External id": 977358,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "103419904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310463.635, "dur": 2.817, + "args": { + "External id": 977359,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "110759936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310468.593, "dur": 0.400, + "args": { + "External id": 977360,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "118099968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310471.378, "dur": 0.910, + "args": { + "External id": 977361,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "125440000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310477.547, "dur": 0.818, + "args": { + "External id": 977362,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "125440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939310568.916, "dur": 190.748, + "args": { + "External id": 977363,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939310876.521, "dur": 638.285, + "args": { + "External id": 977364,"Record function id": 0, "Concrete Inputs": ["", "", "141824512", "8", "2", "15", ""], "Input type": ["TensorList", "", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 16408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939310899.380, "dur": 14.135, + "args": { + "External id": 977365,"Record function id": 0, "Concrete Inputs": ["[1134596096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939310923.838, "dur": 20.245, + "args": { + "External id": 977366,"Record function id": 0, "Concrete Inputs": ["", "0", "283649024", "141824512"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 16410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939310932.769, "dur": 10.773, + "args": { + "External id": 977367,"Record function id": 0, "Concrete Inputs": ["", "0", "283649024", "425473536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[1134596096], [], [], [], []], "Ev Idx": 16411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310937.690, "dur": 0.882, + "args": { + "External id": 977368,"Record function id": 0, "Concrete Inputs": ["", "[141824512]", "[1]", "283649024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[1134596096], [], [], []], "Ev Idx": 16412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939310956.803, "dur": 307.677, + "args": { + "External id": 977369,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["c10::BFloat16", "", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[141824512], [], []], "Ev Idx": 16413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310959.914, "dur": 0.934, + "args": { + "External id": 977370,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "283649024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310963.485, "dur": 2.740, + "args": { + "External id": 977371,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "300033024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310968.514, "dur": 0.889, + "args": { + "External id": 977372,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "300033536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310971.543, "dur": 0.838, + "args": { + "External id": 977373,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "302130688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310976.633, "dur": 0.686, + "args": { + "External id": 977374,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "302654976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310979.430, "dur": 0.608, + "args": { + "External id": 977375,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "303179264"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310982.172, "dur": 3.858, + "args": { + "External id": 977376,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "305276416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310990.744, "dur": 0.584, + "args": { + "External id": 977377,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "305276928"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310994.156, "dur": 0.570, + "args": { + "External id": 977378,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "312616960"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939310997.129, "dur": 2.736, + "args": { + "External id": 977379,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "319956992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311002.075, "dur": 0.530, + "args": { + "External id": 977380,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "327297024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311005.152, "dur": 0.671, + "args": { + "External id": 977381,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "327297536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311038.435, "dur": 0.799, + "args": { + "External id": 977382,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "329394688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311042.174, "dur": 0.588, + "args": { + "External id": 977383,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "329918976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311045.491, "dur": 3.501, + "args": { + "External id": 977384,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "330443264"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311053.637, "dur": 0.615, + "args": { + "External id": 977385,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "332540416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311145.941, "dur": 0.899, + "args": { + "External id": 977386,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "332540928"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311150.399, "dur": 3.405, + "args": { + "External id": 977387,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "339880960"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311156.464, "dur": 0.711, + "args": { + "External id": 977388,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "347220992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311159.765, "dur": 0.856, + "args": { + "External id": 977389,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "354561024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311165.688, "dur": 0.676, + "args": { + "External id": 977390,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "354561536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311169.174, "dur": 0.785, + "args": { + "External id": 977391,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "356658688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311172.249, "dur": 3.414, + "args": { + "External id": 977392,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "357182976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311180.410, "dur": 1.415, + "args": { + "External id": 977393,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "357707264"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311184.197, "dur": 0.915, + "args": { + "External id": 977394,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "359804416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311187.376, "dur": 2.601, + "args": { + "External id": 977395,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "359804928"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311191.684, "dur": 0.480, + "args": { + "External id": 977396,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "367144960"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311194.872, "dur": 0.489, + "args": { + "External id": 977397,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "374484992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311199.584, "dur": 0.547, + "args": { + "External id": 977398,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "381825024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311202.074, "dur": 0.553, + "args": { + "External id": 977399,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "381825536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311204.525, "dur": 3.426, + "args": { + "External id": 977400,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "383922688"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311212.410, "dur": 0.581, + "args": { + "External id": 977401,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "384446976"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311214.504, "dur": 0.580, + "args": { + "External id": 977402,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "384971264"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311216.804, "dur": 2.893, + "args": { + "External id": 977403,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "387068416"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311221.358, "dur": 0.807, + "args": { + "External id": 977404,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "387068928"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311223.844, "dur": 0.790, + "args": { + "External id": 977405,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "394408960"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311228.912, "dur": 0.522, + "args": { + "External id": 977406,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "401748992"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311230.915, "dur": 0.504, + "args": { + "External id": 977407,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "409089024"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939311232.852, "dur": 2.987, + "args": { + "External id": 977408,"Record function id": 0, "Concrete Inputs": ["", "[16384000]", "[1]", "409089536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[141824512], [], [], []], "Ev Idx": 16452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939311325.581, "dur": 156.605, + "args": { + "External id": 983553,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 16453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939311632.761, "dur": 685.265, + "args": { + "External id": 983554,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[1134596096], [141824512], [], [], []], "Ev Idx": 16454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939311682.172, "dur": 627.671, + "args": { + "External id": 983555,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1134596096, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[141824512], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16455, "In msg nelems": 141824512 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939311702.408, "dur": 598.005, + "args": { + "External id": 983556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[141824512]], "Ev Idx": 16456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939312364.893, "dur": 3.805, + "args": { + "External id": 983557,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16457, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 2338708, "tid": 2338708, + "ts": 6345939312542.082, "dur": 11743.087, + "args": { + "External id": 983558,"Record function id": 0, "Ev Idx": 16458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312885.999, "dur": 9.628, + "args": { + "External id": 983559,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[1134596096], []], "Ev Idx": 16459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312901.922, "dur": 1.711, + "args": { + "External id": 983560,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 16460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312906.552, "dur": 1.561, + "args": { + "External id": 983561,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312911.988, "dur": 1.561, + "args": { + "External id": 983562,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312916.782, "dur": 1.653, + "args": { + "External id": 983563,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312920.587, "dur": 1.527, + "args": { + "External id": 983564,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312928.549, "dur": 1.624, + "args": { + "External id": 983565,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312932.848, "dur": 6.941, + "args": { + "External id": 983566,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312943.010, "dur": 1.058, + "args": { + "External id": 983567,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312946.270, "dur": 1.033, + "args": { + "External id": 983568,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312951.856, "dur": 1.259, + "args": { + "External id": 983569,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312955.973, "dur": 1.134, + "args": { + "External id": 983570,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312959.813, "dur": 1.104, + "args": { + "External id": 983571,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312963.142, "dur": 1.184, + "args": { + "External id": 983572,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312969.931, "dur": 1.287, + "args": { + "External id": 983573,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312973.616, "dur": 6.875, + "args": { + "External id": 983574,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312983.289, "dur": 0.974, + "args": { + "External id": 983575,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312986.723, "dur": 1.074, + "args": { + "External id": 983576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939312997.415, "dur": 1.182, + "args": { + "External id": 983577,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313001.219, "dur": 1.371, + "args": { + "External id": 983578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313005.086, "dur": 1.385, + "args": { + "External id": 983579,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313043.696, "dur": 2.274, + "args": { + "External id": 983580,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313052.094, "dur": 1.373, + "args": { + "External id": 983581,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313101.702, "dur": 6.339, + "args": { + "External id": 983582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313112.240, "dur": 1.274, + "args": { + "External id": 983583,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313116.170, "dur": 1.225, + "args": { + "External id": 983584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313123.266, "dur": 1.468, + "args": { + "External id": 983585,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313129.616, "dur": 1.194, + "args": { + "External id": 983586,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313133.636, "dur": 1.056, + "args": { + "External id": 983587,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313137.203, "dur": 1.396, + "args": { + "External id": 983588,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313144.075, "dur": 1.355, + "args": { + "External id": 983589,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313147.847, "dur": 6.607, + "args": { + "External id": 983590,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313157.035, "dur": 1.343, + "args": { + "External id": 983591,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313161.177, "dur": 1.020, + "args": { + "External id": 983592,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313170.382, "dur": 1.174, + "args": { + "External id": 983593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313174.130, "dur": 1.123, + "args": { + "External id": 983594,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313177.567, "dur": 1.310, + "args": { + "External id": 983595,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313181.640, "dur": 1.197, + "args": { + "External id": 983596,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313188.153, "dur": 1.130, + "args": { + "External id": 983597,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939313191.999, "dur": 6.198, + "args": { + "External id": 983598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[131072000], []], "Ev Idx": 16498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939313241.873, "dur": 10934.393, + "args": { + "External id": 983599,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 16499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939313276.897, "dur": 10878.387, + "args": { + "External id": 983600,"Record function id": 0, "Concrete Inputs": ["", "", "1", ""], "Input type": ["c10::BFloat16", "", "Scalar", "TensorList"], "Input Strides": [[141824512, 1], [], [], []], "Input Dims": [[8, 141824512], [], [], []], "Ev Idx": 16500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939313310.409, "dur": 23.486, + "args": { + "External id": 983601,"Record function id": 0, "Concrete Inputs": ["[4384]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939313344.677, "dur": 10655.956, + "args": { + "External id": 983602,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], [], []], "Ev Idx": 16502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939313348.109, "dur": 10650.622, + "args": { + "External id": 983603,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[4384], [], [], [], [], [], []], "Ev Idx": 16503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939313356.399, "dur": 9.316, + "args": { + "External id": 983604,"Record function id": 0, "Concrete Inputs": ["[4384]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939313370.820, "dur": 10620.707, + "args": { + "External id": 983605,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4384], [4384], []], "Ev Idx": 16505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939324759.511, "dur": 58.010, + "args": { + "External id": 983606,"Record function id": 0, "Ev Idx": 16506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 2338708, "tid": 2338708, + "ts": 6345939324820.120, "dur": 422.792, + "args": { + "External id": 983607,"Record function id": 0, "Ev Idx": 16507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939324894.143, "dur": 333.124, + "args": { + "External id": 983608,"Sequence number": 10552471, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[4096, 1], [4096, 1]], "Input Dims": [[32000, 4096], [8, 4096]], "Ev Idx": 16508 + } + }, + { + "ph": "s", "id": 448, "pid": 2338708, "tid": 2338708, "ts": 6345939324894.143, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939325036.860, "dur": 123.977, + "args": { + "External id": 983609,"kernel_hash": "cljo2nzima3hpaovvfppftdgufxpb4dtilebb6n5aksulaywtrgm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/lj/cljo2nzima3hpaovvfppftdgufxpb4dtilebb6n5aksulaywtrgm.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096], [32000, 4096], [8, 4096, 4096], []], "Ev Idx": 16509 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939325348.834, "dur": 89.539, + "args": { + "External id": 983610,"Record function id": 0, "Ev Idx": 16510 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 2338708, "tid": 2338708, + "ts": 6345939325456.021, "dur": 7823.925, + "args": { + "External id": 983611,"Record function id": 0, "Ev Idx": 16511 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2338708, "tid": 2338708, + "ts": 6345939325468.742, "dur": 1216.729, + "args": { + "External id": 983612,"Record function id": 0, "Ev Idx": 16512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939325591.176, "dur": 16.308, + "args": { + "External id": 983613,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939325628.096, "dur": 60.042, + "args": { + "External id": 983614,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325638.788, "dur": 2.806, + "args": { + "External id": 983615,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325644.875, "dur": 0.905, + "args": { + "External id": 983616,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325647.958, "dur": 3.555, + "args": { + "External id": 983617,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325654.198, "dur": 3.238, + "args": { + "External id": 983618,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325659.760, "dur": 0.824, + "args": { + "External id": 983619,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325665.054, "dur": 0.449, + "args": { + "External id": 983620,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325667.014, "dur": 0.765, + "args": { + "External id": 983621,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325670.009, "dur": 0.482, + "args": { + "External id": 983622,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325675.987, "dur": 0.691, + "args": { + "External id": 983623,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939325705.498, "dur": 71.457, + "args": { + "External id": 983624,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939325823.942, "dur": 172.400, + "args": { + "External id": 983625,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939325840.393, "dur": 8.572, + "args": { + "External id": 983626,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939325856.024, "dur": 14.210, + "args": { + "External id": 983627,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939325861.562, "dur": 8.126, + "args": { + "External id": 983628,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325866.500, "dur": 1.025, + "args": { + "External id": 983629,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939325879.915, "dur": 44.386, + "args": { + "External id": 983630,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325882.689, "dur": 3.311, + "args": { + "External id": 983631,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325888.380, "dur": 0.611, + "args": { + "External id": 983632,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325893.135, "dur": 0.711, + "args": { + "External id": 983633,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325895.751, "dur": 0.878, + "args": { + "External id": 983634,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325898.022, "dur": 2.474, + "args": { + "External id": 983635,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325902.318, "dur": 0.485, + "args": { + "External id": 983636,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325906.009, "dur": 0.695, + "args": { + "External id": 983637,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325911.114, "dur": 0.408, + "args": { + "External id": 983638,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939325913.080, "dur": 3.109, + "args": { + "External id": 983639,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939325941.121, "dur": 42.173, + "args": { + "External id": 983640,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939326145.021, "dur": 407.023, + "args": { + "External id": 983641,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939326189.962, "dur": 355.089, + "args": { + "External id": 983642,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16542, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939326207.611, "dur": 330.405, + "args": { + "External id": 983643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939326581.568, "dur": 3.185, + "args": { + "External id": 983644,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16544, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2338708, "tid": 2338708, + "ts": 6345939326713.575, "dur": 6244.151, + "args": { + "External id": 983645,"Record function id": 0, "Ev Idx": 16545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939326865.405, "dur": 8.740, + "args": { + "External id": 983646,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939326879.076, "dur": 1.783, + "args": { + "External id": 983647,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939326883.258, "dur": 1.614, + "args": { + "External id": 983648,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939326887.432, "dur": 1.186, + "args": { + "External id": 983649,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939326890.730, "dur": 1.612, + "args": { + "External id": 983650,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939326897.610, "dur": 1.312, + "args": { + "External id": 983651,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939326901.488, "dur": 1.211, + "args": { + "External id": 983652,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939326905.356, "dur": 6.883, + "args": { + "External id": 983653,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939326914.827, "dur": 0.989, + "args": { + "External id": 983654,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939326920.815, "dur": 1.149, + "args": { + "External id": 983655,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939326954.490, "dur": 5934.320, + "args": { + "External id": 983656,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939326975.215, "dur": 5900.726, + "args": { + "External id": 983657,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939326997.931, "dur": 46.034, + "args": { + "External id": 983658,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939327051.684, "dur": 5775.418, + "args": { + "External id": 983659,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939327100.573, "dur": 5725.247, + "args": { + "External id": 983660,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939327114.362, "dur": 10.205, + "args": { + "External id": 983661,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939327127.013, "dur": 5693.750, + "args": { + "External id": 983662,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939333208.171, "dur": 42.026, + "args": { + "External id": 983663,"Sequence number": 10552472, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16563 + } + }, + { + "ph": "s", "id": 447, "pid": 2338708, "tid": 2338708, "ts": 6345939333208.171, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939333232.469, "dur": 12.784, + "args": { + "External id": 983664,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939333237.991, "dur": 6.865, + "args": { + "External id": 983665,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939333328.015, "dur": 156.199, + "args": { + "External id": 983666,"Record function id": 0, "Ev Idx": 16566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939333485.641, "dur": 1321.895, + "args": { + "External id": 983667,"Record function id": 0, "Ev Idx": 16567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939333534.530, "dur": 1256.833, + "args": { + "External id": 983668,"Sequence number": 10552473, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16568 + } + }, + { + "ph": "s", "id": 446, "pid": 2338708, "tid": 2338708, "ts": 6345939333534.530, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939333624.749, "dur": 54.523, + "args": { + "External id": 983669,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939333694.310, "dur": 121.783, + "args": { + "External id": 983670,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939333826.222, "dur": 37.619, + "args": { + "External id": 983671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939333873.599, "dur": 29.612, + "args": { + "External id": 983672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939333935.507, "dur": 35.690, + "args": { + "External id": 983673,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939333995.734, "dur": 45.062, + "args": { + "External id": 983674,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939334109.494, "dur": 188.227, + "args": { + "External id": 983675,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939334198.637, "dur": 18.444, + "args": { + "External id": 983676,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939334204.220, "dur": 11.741, + "args": { + "External id": 983677,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939334220.379, "dur": 4.255, + "args": { + "External id": 983678,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939334225.879, "dur": 1.312, + "args": { + "External id": 983679,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939334230.032, "dur": 4.264, + "args": { + "External id": 983680,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939334309.468, "dur": 56.378, + "args": { + "External id": 983681,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939334403.997, "dur": 39.878, + "args": { + "External id": 983682,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939334454.795, "dur": 43.679, + "args": { + "External id": 983683,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939334509.369, "dur": 34.327, + "args": { + "External id": 983684,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939334572.981, "dur": 32.700, + "args": { + "External id": 983685,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939334613.968, "dur": 42.699, + "args": { + "External id": 983686,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939334676.511, "dur": 22.966, + "args": { + "External id": 983687,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16587 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 2338708, "tid": 2338708, + "ts": 6345939334879.187, "dur": 99.964, + "args": { + "External id": 983688,"Record function id": 0, "Ev Idx": 16588 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939335121.575, "dur": 50.372, + "args": { + "External id": 983689,"Record function id": 0, "Ev Idx": 16589 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 2338708, "tid": 2338708, + "ts": 6345939335181.417, "dur": 26989.690, + "args": { + "External id": 983690,"Record function id": 0, "Ev Idx": 16590 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2338708, "tid": 2338708, + "ts": 6345939335191.462, "dur": 1155.649, + "args": { + "External id": 983691,"Record function id": 0, "Ev Idx": 16591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939335281.633, "dur": 10.196, + "args": { + "External id": 983692,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939335305.945, "dur": 44.603, + "args": { + "External id": 983693,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335312.676, "dur": 4.355, + "args": { + "External id": 983694,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335319.198, "dur": 0.625, + "args": { + "External id": 983695,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335321.038, "dur": 0.400, + "args": { + "External id": 983696,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335324.866, "dur": 0.510, + "args": { + "External id": 983697,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335326.795, "dur": 0.376, + "args": { + "External id": 983698,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335328.418, "dur": 0.597, + "args": { + "External id": 983699,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335332.669, "dur": 2.869, + "args": { + "External id": 983700,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335336.404, "dur": 0.405, + "args": { + "External id": 983701,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335337.861, "dur": 2.725, + "args": { + "External id": 983702,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939335372.289, "dur": 68.873, + "args": { + "External id": 983703,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939335487.771, "dur": 174.622, + "args": { + "External id": 983704,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939335502.818, "dur": 9.960, + "args": { + "External id": 983705,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939335519.964, "dur": 13.121, + "args": { + "External id": 983706,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939335525.047, "dur": 7.271, + "args": { + "External id": 983707,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335529.322, "dur": 1.082, + "args": { + "External id": 983708,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939335543.061, "dur": 39.565, + "args": { + "External id": 983709,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335545.625, "dur": 0.673, + "args": { + "External id": 983710,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335548.383, "dur": 2.843, + "args": { + "External id": 983711,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335553.085, "dur": 0.608, + "args": { + "External id": 983712,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335555.262, "dur": 2.869, + "args": { + "External id": 983713,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335562.749, "dur": 0.409, + "args": { + "External id": 983714,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335564.879, "dur": 0.469, + "args": { + "External id": 983715,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335567.165, "dur": 0.619, + "args": { + "External id": 983716,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335571.562, "dur": 0.356, + "args": { + "External id": 983717,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939335573.596, "dur": 0.338, + "args": { + "External id": 983718,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939335601.813, "dur": 48.039, + "args": { + "External id": 983719,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939335729.060, "dur": 497.174, + "args": { + "External id": 983720,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939335765.862, "dur": 453.967, + "args": { + "External id": 983721,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16621, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939335775.997, "dur": 436.841, + "args": { + "External id": 983722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939336258.127, "dur": 2.747, + "args": { + "External id": 983723,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16623, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2338708, "tid": 2338708, + "ts": 6345939336370.871, "dur": 25499.842, + "args": { + "External id": 983724,"Record function id": 0, "Ev Idx": 16624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939336499.815, "dur": 7.137, + "args": { + "External id": 983725,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939336510.715, "dur": 1.301, + "args": { + "External id": 983726,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939336513.647, "dur": 3.651, + "args": { + "External id": 983727,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939336519.113, "dur": 1.048, + "args": { + "External id": 983728,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939336521.460, "dur": 1.114, + "args": { + "External id": 983729,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939336526.620, "dur": 1.168, + "args": { + "External id": 983730,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939336529.654, "dur": 1.167, + "args": { + "External id": 983731,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939336531.989, "dur": 3.878, + "args": { + "External id": 983732,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939336537.481, "dur": 0.813, + "args": { + "External id": 983733,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939336542.323, "dur": 0.682, + "args": { + "External id": 983734,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939336561.713, "dur": 25235.219, + "args": { + "External id": 983735,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939336578.428, "dur": 25203.503, + "args": { + "External id": 983736,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939336598.562, "dur": 17.062, + "args": { + "External id": 983737,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939336620.847, "dur": 25105.710, + "args": { + "External id": 983738,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939336623.935, "dur": 25101.157, + "args": { + "External id": 983739,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939336631.215, "dur": 5.426, + "args": { + "External id": 983740,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939336638.584, "dur": 25079.734, + "args": { + "External id": 983741,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939362097.440, "dur": 43.024, + "args": { + "External id": 983742,"Sequence number": 10552474, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16642 + } + }, + { + "ph": "s", "id": 445, "pid": 2338708, "tid": 2338708, "ts": 6345939362097.440, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939362118.632, "dur": 16.786, + "args": { + "External id": 983743,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939362127.444, "dur": 7.587, + "args": { + "External id": 983744,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939362221.142, "dur": 94.977, + "args": { + "External id": 983745,"Record function id": 0, "Ev Idx": 16645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939362317.813, "dur": 1177.707, + "args": { + "External id": 983746,"Record function id": 0, "Ev Idx": 16646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939362360.425, "dur": 1120.901, + "args": { + "External id": 983747,"Sequence number": 10552475, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16647 + } + }, + { + "ph": "s", "id": 444, "pid": 2338708, "tid": 2338708, "ts": 6345939362360.425, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939362435.074, "dur": 54.291, + "args": { + "External id": 983748,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939362503.467, "dur": 108.221, + "args": { + "External id": 983749,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939362626.406, "dur": 37.048, + "args": { + "External id": 983750,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939362670.826, "dur": 29.766, + "args": { + "External id": 983751,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939362727.982, "dur": 31.951, + "args": { + "External id": 983752,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939362780.876, "dur": 20.801, + "args": { + "External id": 983753,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939362826.173, "dur": 145.082, + "args": { + "External id": 983754,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939362881.542, "dur": 14.910, + "args": { + "External id": 983755,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939362887.472, "dur": 8.043, + "args": { + "External id": 983756,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939362899.433, "dur": 4.992, + "args": { + "External id": 983757,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939362905.782, "dur": 1.032, + "args": { + "External id": 983758,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939362911.542, "dur": 5.850, + "args": { + "External id": 983759,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939362982.169, "dur": 68.114, + "args": { + "External id": 983760,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939363125.418, "dur": 31.526, + "args": { + "External id": 983761,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939363169.343, "dur": 47.594, + "args": { + "External id": 983762,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939363224.446, "dur": 35.244, + "args": { + "External id": 983763,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939363285.721, "dur": 31.803, + "args": { + "External id": 983764,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939363324.405, "dur": 35.442, + "args": { + "External id": 983765,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939363382.427, "dur": 19.206, + "args": { + "External id": 983766,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16666 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 2338708, "tid": 2338708, + "ts": 6345939363563.963, "dur": 91.201, + "args": { + "External id": 983767,"Record function id": 0, "Ev Idx": 16667 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939363733.766, "dur": 46.822, + "args": { + "External id": 983768,"Record function id": 0, "Ev Idx": 16668 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 2338708, "tid": 2338708, + "ts": 6345939363790.056, "dur": 28132.940, + "args": { + "External id": 983769,"Record function id": 0, "Ev Idx": 16669 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2338708, "tid": 2338708, + "ts": 6345939363800.071, "dur": 1001.571, + "args": { + "External id": 983770,"Record function id": 0, "Ev Idx": 16670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939363890.958, "dur": 9.131, + "args": { + "External id": 983771,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939363913.275, "dur": 38.533, + "args": { + "External id": 983772,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939363918.719, "dur": 2.310, + "args": { + "External id": 983773,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939363925.892, "dur": 0.556, + "args": { + "External id": 983774,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939363927.455, "dur": 0.354, + "args": { + "External id": 983775,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939363928.519, "dur": 0.456, + "args": { + "External id": 983776,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939363932.388, "dur": 0.522, + "args": { + "External id": 983777,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939363933.740, "dur": 0.409, + "args": { + "External id": 983778,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939363935.320, "dur": 4.316, + "args": { + "External id": 983779,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939363940.518, "dur": 0.714, + "args": { + "External id": 983780,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939363942.129, "dur": 0.466, + "args": { + "External id": 983781,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939363965.919, "dur": 78.911, + "args": { + "External id": 983782,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939364122.450, "dur": 143.545, + "args": { + "External id": 983783,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939364134.733, "dur": 6.480, + "args": { + "External id": 983784,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939364147.088, "dur": 11.424, + "args": { + "External id": 983785,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939364151.735, "dur": 6.361, + "args": { + "External id": 983786,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939364155.265, "dur": 0.879, + "args": { + "External id": 983787,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939364166.558, "dur": 36.195, + "args": { + "External id": 983788,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939364168.586, "dur": 3.346, + "args": { + "External id": 983789,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939364173.016, "dur": 0.459, + "args": { + "External id": 983790,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939364174.120, "dur": 0.346, + "args": { + "External id": 983791,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939364177.809, "dur": 3.564, + "args": { + "External id": 983792,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939364182.205, "dur": 0.454, + "args": { + "External id": 983793,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939364183.313, "dur": 0.551, + "args": { + "External id": 983794,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939364187.355, "dur": 0.292, + "args": { + "External id": 983795,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939364189.786, "dur": 0.596, + "args": { + "External id": 983796,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939364192.666, "dur": 2.692, + "args": { + "External id": 983797,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939364220.972, "dur": 36.024, + "args": { + "External id": 983798,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939364325.431, "dur": 378.090, + "args": { + "External id": 983799,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939364358.081, "dur": 340.507, + "args": { + "External id": 983800,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16700, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939364372.310, "dur": 320.651, + "args": { + "External id": 983801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939364729.935, "dur": 2.105, + "args": { + "External id": 983802,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16702, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2338708, "tid": 2338708, + "ts": 6345939364821.883, "dur": 26868.787, + "args": { + "External id": 983803,"Record function id": 0, "Ev Idx": 16703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939364935.265, "dur": 5.750, + "args": { + "External id": 983804,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939364944.491, "dur": 1.274, + "args": { + "External id": 983805,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939364947.305, "dur": 3.803, + "args": { + "External id": 983806,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939364952.973, "dur": 0.902, + "args": { + "External id": 983807,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939364955.202, "dur": 0.741, + "args": { + "External id": 983808,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939364957.415, "dur": 1.146, + "args": { + "External id": 983809,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939364963.580, "dur": 0.981, + "args": { + "External id": 983810,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939364965.998, "dur": 3.424, + "args": { + "External id": 983811,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939364970.791, "dur": 0.849, + "args": { + "External id": 983812,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939364973.056, "dur": 0.738, + "args": { + "External id": 983813,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939364993.552, "dur": 26632.790, + "args": { + "External id": 983814,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939365029.656, "dur": 26584.532, + "args": { + "External id": 983815,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939365049.722, "dur": 54.240, + "args": { + "External id": 983816,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939365109.436, "dur": 26459.996, + "args": { + "External id": 983817,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939365112.238, "dur": 26456.086, + "args": { + "External id": 983818,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939365118.774, "dur": 6.715, + "args": { + "External id": 983819,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939365127.396, "dur": 26436.207, + "args": { + "External id": 983820,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939391860.499, "dur": 36.728, + "args": { + "External id": 983821,"Sequence number": 10552476, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16721 + } + }, + { + "ph": "s", "id": 443, "pid": 2338708, "tid": 2338708, "ts": 6345939391860.499, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939391880.391, "dur": 11.885, + "args": { + "External id": 983822,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939391885.442, "dur": 6.653, + "args": { + "External id": 983823,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939391967.769, "dur": 128.569, + "args": { + "External id": 983824,"Record function id": 0, "Ev Idx": 16724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939392099.520, "dur": 1165.564, + "args": { + "External id": 983825,"Record function id": 0, "Ev Idx": 16725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939392145.098, "dur": 1105.418, + "args": { + "External id": 983826,"Sequence number": 10552477, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16726 + } + }, + { + "ph": "s", "id": 442, "pid": 2338708, "tid": 2338708, "ts": 6345939392145.098, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939392220.112, "dur": 52.947, + "args": { + "External id": 983827,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939392287.272, "dur": 108.421, + "args": { + "External id": 983828,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939392407.304, "dur": 39.721, + "args": { + "External id": 983829,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939392456.721, "dur": 29.451, + "args": { + "External id": 983830,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939392513.473, "dur": 28.801, + "args": { + "External id": 983831,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939392563.686, "dur": 18.474, + "args": { + "External id": 983832,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939392607.279, "dur": 140.799, + "args": { + "External id": 983833,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939392662.686, "dur": 12.471, + "args": { + "External id": 983834,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939392668.444, "dur": 5.956, + "args": { + "External id": 983835,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939392677.731, "dur": 4.821, + "args": { + "External id": 983836,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939392683.636, "dur": 0.877, + "args": { + "External id": 983837,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939392687.292, "dur": 8.732, + "args": { + "External id": 983838,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939392759.746, "dur": 47.797, + "args": { + "External id": 983839,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939392838.454, "dur": 29.587, + "args": { + "External id": 983840,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939392878.098, "dur": 41.243, + "args": { + "External id": 983841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939392928.521, "dur": 34.520, + "args": { + "External id": 983842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939392988.180, "dur": 49.258, + "args": { + "External id": 983843,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939393048.781, "dur": 78.346, + "args": { + "External id": 983844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939393152.525, "dur": 19.610, + "args": { + "External id": 983845,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16745 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 2338708, "tid": 2338708, + "ts": 6345939393334.196, "dur": 91.980, + "args": { + "External id": 983846,"Record function id": 0, "Ev Idx": 16746 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939393505.360, "dur": 48.725, + "args": { + "External id": 983847,"Record function id": 0, "Ev Idx": 16747 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 2338708, "tid": 2338708, + "ts": 6345939393563.058, "dur": 30005.866, + "args": { + "External id": 983848,"Record function id": 0, "Ev Idx": 16748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2338708, "tid": 2338708, + "ts": 6345939393571.376, "dur": 912.811, + "args": { + "External id": 983849,"Record function id": 0, "Ev Idx": 16749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939393662.986, "dur": 9.347, + "args": { + "External id": 983850,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939393686.243, "dur": 41.589, + "args": { + "External id": 983851,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393691.690, "dur": 2.225, + "args": { + "External id": 983852,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393698.716, "dur": 0.364, + "args": { + "External id": 983853,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393700.155, "dur": 0.355, + "args": { + "External id": 983854,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393701.856, "dur": 0.439, + "args": { + "External id": 983855,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393705.565, "dur": 0.464, + "args": { + "External id": 983856,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393707.582, "dur": 0.786, + "args": { + "External id": 983857,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393709.435, "dur": 4.770, + "args": { + "External id": 983858,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393715.919, "dur": 0.357, + "args": { + "External id": 983859,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393717.189, "dur": 0.492, + "args": { + "External id": 983860,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939393741.856, "dur": 50.601, + "args": { + "External id": 983861,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939393824.436, "dur": 128.311, + "args": { + "External id": 983862,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939393834.874, "dur": 4.046, + "args": { + "External id": 983863,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939393844.162, "dur": 10.569, + "args": { + "External id": 983864,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939393848.941, "dur": 5.374, + "args": { + "External id": 983865,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393852.334, "dur": 0.785, + "args": { + "External id": 983866,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939393861.226, "dur": 36.802, + "args": { + "External id": 983867,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393863.371, "dur": 2.962, + "args": { + "External id": 983868,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393867.557, "dur": 0.407, + "args": { + "External id": 983869,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393869.804, "dur": 0.573, + "args": { + "External id": 983870,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393874.030, "dur": 2.734, + "args": { + "External id": 983871,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393878.458, "dur": 0.536, + "args": { + "External id": 983872,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393880.307, "dur": 0.482, + "args": { + "External id": 983873,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393884.926, "dur": 0.458, + "args": { + "External id": 983874,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393886.528, "dur": 0.283, + "args": { + "External id": 983875,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939393888.543, "dur": 3.138, + "args": { + "External id": 983876,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939393911.832, "dur": 32.929, + "args": { + "External id": 983877,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939394007.125, "dur": 371.827, + "args": { + "External id": 983878,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939394104.650, "dur": 269.264, + "args": { + "External id": 983879,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16779, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939394117.278, "dur": 248.953, + "args": { + "External id": 983880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939394405.959, "dur": 2.964, + "args": { + "External id": 983881,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16781, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2338708, "tid": 2338708, + "ts": 6345939394505.437, "dur": 28839.614, + "args": { + "External id": 983882,"Record function id": 0, "Ev Idx": 16782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939394613.779, "dur": 7.124, + "args": { + "External id": 983883,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939394624.237, "dur": 1.020, + "args": { + "External id": 983884,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939394626.937, "dur": 3.789, + "args": { + "External id": 983885,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939394632.531, "dur": 0.903, + "args": { + "External id": 983886,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939394635.210, "dur": 1.069, + "args": { + "External id": 983887,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939394637.592, "dur": 1.140, + "args": { + "External id": 983888,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939394642.558, "dur": 1.312, + "args": { + "External id": 983889,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939394645.406, "dur": 2.887, + "args": { + "External id": 983890,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939394649.814, "dur": 0.820, + "args": { + "External id": 983891,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939394651.939, "dur": 0.668, + "args": { + "External id": 983892,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939394674.231, "dur": 28612.372, + "args": { + "External id": 983893,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939394690.079, "dur": 28584.940, + "args": { + "External id": 983894,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939394708.547, "dur": 17.059, + "args": { + "External id": 983895,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939394729.454, "dur": 28504.754, + "args": { + "External id": 983896,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939394732.179, "dur": 28501.106, + "args": { + "External id": 983897,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939394739.840, "dur": 5.234, + "args": { + "External id": 983898,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939394746.703, "dur": 28482.318, + "args": { + "External id": 983899,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939423507.228, "dur": 35.358, + "args": { + "External id": 983900,"Sequence number": 10552478, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16800 + } + }, + { + "ph": "s", "id": 441, "pid": 2338708, "tid": 2338708, "ts": 6345939423507.228, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939423525.898, "dur": 11.756, + "args": { + "External id": 983901,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939423530.752, "dur": 6.662, + "args": { + "External id": 983902,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939423610.998, "dur": 78.414, + "args": { + "External id": 983903,"Record function id": 0, "Ev Idx": 16803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939423690.699, "dur": 1154.317, + "args": { + "External id": 983904,"Record function id": 0, "Ev Idx": 16804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939423732.403, "dur": 1098.643, + "args": { + "External id": 983905,"Sequence number": 10552479, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16805 + } + }, + { + "ph": "s", "id": 440, "pid": 2338708, "tid": 2338708, "ts": 6345939423732.403, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939423802.128, "dur": 50.495, + "args": { + "External id": 983906,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939423870.256, "dur": 104.675, + "args": { + "External id": 983907,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939423986.701, "dur": 59.348, + "args": { + "External id": 983908,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939424094.457, "dur": 36.188, + "args": { + "External id": 983909,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939424164.915, "dur": 28.742, + "args": { + "External id": 983910,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939424215.029, "dur": 15.726, + "args": { + "External id": 983911,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939424254.484, "dur": 137.291, + "args": { + "External id": 983912,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939424308.953, "dur": 12.453, + "args": { + "External id": 983913,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939424314.722, "dur": 5.833, + "args": { + "External id": 983914,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939424324.096, "dur": 4.497, + "args": { + "External id": 983915,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939424329.799, "dur": 0.968, + "args": { + "External id": 983916,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939424333.215, "dur": 5.604, + "args": { + "External id": 983917,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939424403.883, "dur": 49.771, + "args": { + "External id": 983918,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939424490.470, "dur": 32.243, + "args": { + "External id": 983919,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939424533.375, "dur": 42.387, + "args": { + "External id": 983920,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939424585.667, "dur": 35.605, + "args": { + "External id": 983921,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939424643.269, "dur": 28.651, + "args": { + "External id": 983922,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939424680.768, "dur": 35.963, + "args": { + "External id": 983923,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939424737.255, "dur": 20.105, + "args": { + "External id": 983924,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16824 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 2338708, "tid": 2338708, + "ts": 6345939424912.059, "dur": 85.792, + "args": { + "External id": 983925,"Record function id": 0, "Ev Idx": 16825 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939425132.003, "dur": 49.081, + "args": { + "External id": 983926,"Record function id": 0, "Ev Idx": 16826 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 2338708, "tid": 2338708, + "ts": 6345939425191.449, "dur": 29905.485, + "args": { + "External id": 983927,"Record function id": 0, "Ev Idx": 16827 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2338708, "tid": 2338708, + "ts": 6345939425202.177, "dur": 987.754, + "args": { + "External id": 983928,"Record function id": 0, "Ev Idx": 16828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939425290.461, "dur": 9.666, + "args": { + "External id": 983929,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939425314.775, "dur": 40.043, + "args": { + "External id": 983930,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425321.080, "dur": 2.266, + "args": { + "External id": 983931,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425328.022, "dur": 0.635, + "args": { + "External id": 983932,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425330.174, "dur": 0.326, + "args": { + "External id": 983933,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425331.833, "dur": 0.321, + "args": { + "External id": 983934,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425336.344, "dur": 0.465, + "args": { + "External id": 983935,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425337.947, "dur": 0.544, + "args": { + "External id": 983936,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425340.267, "dur": 3.415, + "args": { + "External id": 983937,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425344.692, "dur": 0.559, + "args": { + "External id": 983938,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425346.629, "dur": 0.562, + "args": { + "External id": 983939,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939425368.607, "dur": 58.293, + "args": { + "External id": 983940,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939425463.507, "dur": 125.895, + "args": { + "External id": 983941,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939425475.053, "dur": 6.286, + "args": { + "External id": 983942,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939425486.643, "dur": 10.667, + "args": { + "External id": 983943,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939425491.391, "dur": 5.263, + "args": { + "External id": 983944,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425494.898, "dur": 0.531, + "args": { + "External id": 983945,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939425504.423, "dur": 32.628, + "args": { + "External id": 983946,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425506.614, "dur": 0.655, + "args": { + "External id": 983947,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425509.139, "dur": 2.802, + "args": { + "External id": 983948,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425513.388, "dur": 0.387, + "args": { + "External id": 983949,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425515.810, "dur": 2.872, + "args": { + "External id": 983950,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425522.164, "dur": 0.372, + "args": { + "External id": 983951,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425523.717, "dur": 0.397, + "args": { + "External id": 983952,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425525.312, "dur": 0.413, + "args": { + "External id": 983953,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425529.117, "dur": 0.430, + "args": { + "External id": 983954,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939425530.314, "dur": 0.630, + "args": { + "External id": 983955,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939425548.974, "dur": 31.854, + "args": { + "External id": 983956,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939425643.958, "dur": 393.606, + "args": { + "External id": 983957,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939425678.922, "dur": 352.147, + "args": { + "External id": 983958,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16858, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939425688.892, "dur": 315.132, + "args": { + "External id": 983959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939426100.730, "dur": 4.069, + "args": { + "External id": 983960,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16860, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2338708, "tid": 2338708, + "ts": 6345939426213.944, "dur": 28618.893, + "args": { + "External id": 983961,"Record function id": 0, "Ev Idx": 16861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939426332.492, "dur": 7.016, + "args": { + "External id": 983962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939426343.096, "dur": 1.397, + "args": { + "External id": 983963,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939426346.057, "dur": 3.781, + "args": { + "External id": 983964,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939426351.884, "dur": 0.947, + "args": { + "External id": 983965,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939426354.203, "dur": 1.211, + "args": { + "External id": 983966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939426356.773, "dur": 0.874, + "args": { + "External id": 983967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939426361.249, "dur": 1.301, + "args": { + "External id": 983968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939426364.085, "dur": 2.669, + "args": { + "External id": 983969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939426368.120, "dur": 0.896, + "args": { + "External id": 983970,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939426370.963, "dur": 0.708, + "args": { + "External id": 983971,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939426393.093, "dur": 28387.685, + "args": { + "External id": 983972,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939426409.276, "dur": 28362.142, + "args": { + "External id": 983973,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939426426.831, "dur": 18.514, + "args": { + "External id": 983974,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939426449.228, "dur": 28281.488, + "args": { + "External id": 983975,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939426451.998, "dur": 28277.868, + "args": { + "External id": 983976,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939426458.468, "dur": 5.734, + "args": { + "External id": 983977,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939426466.048, "dur": 28259.910, + "args": { + "External id": 983978,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939454991.136, "dur": 46.381, + "args": { + "External id": 983979,"Sequence number": 10552480, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16879 + } + }, + { + "ph": "s", "id": 439, "pid": 2338708, "tid": 2338708, "ts": 6345939454991.136, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939455020.908, "dur": 10.925, + "args": { + "External id": 983980,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939455025.606, "dur": 5.855, + "args": { + "External id": 983981,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939455142.918, "dur": 81.263, + "args": { + "External id": 983982,"Record function id": 0, "Ev Idx": 16882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939455225.903, "dur": 1171.234, + "args": { + "External id": 983983,"Record function id": 0, "Ev Idx": 16883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939455269.294, "dur": 1113.305, + "args": { + "External id": 983984,"Sequence number": 10552481, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16884 + } + }, + { + "ph": "s", "id": 438, "pid": 2338708, "tid": 2338708, "ts": 6345939455269.294, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939455344.110, "dur": 52.497, + "args": { + "External id": 983985,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939455410.552, "dur": 107.218, + "args": { + "External id": 983986,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939455529.739, "dur": 39.739, + "args": { + "External id": 983987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939455579.812, "dur": 29.149, + "args": { + "External id": 983988,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939455637.299, "dur": 32.159, + "args": { + "External id": 983989,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939455690.249, "dur": 19.999, + "args": { + "External id": 983990,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939455733.043, "dur": 135.443, + "args": { + "External id": 983991,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939455784.064, "dur": 11.969, + "args": { + "External id": 983992,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939455789.756, "dur": 5.498, + "args": { + "External id": 983993,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939455798.945, "dur": 4.274, + "args": { + "External id": 983994,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939455804.680, "dur": 0.902, + "args": { + "External id": 983995,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939455808.310, "dur": 8.544, + "args": { + "External id": 983996,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939455880.373, "dur": 45.777, + "args": { + "External id": 983997,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939455958.896, "dur": 31.331, + "args": { + "External id": 983998,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939456001.370, "dur": 98.879, + "args": { + "External id": 983999,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939456114.845, "dur": 39.282, + "args": { + "External id": 984000,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939456181.934, "dur": 32.945, + "args": { + "External id": 984001,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939456224.337, "dur": 35.655, + "args": { + "External id": 984002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939456279.401, "dur": 25.067, + "args": { + "External id": 984003,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16903 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 2338708, "tid": 2338708, + "ts": 6345939456464.693, "dur": 84.458, + "args": { + "External id": 984004,"Record function id": 0, "Ev Idx": 16904 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939456626.284, "dur": 46.544, + "args": { + "External id": 984005,"Record function id": 0, "Ev Idx": 16905 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 2338708, "tid": 2338708, + "ts": 6345939456682.233, "dur": 30181.937, + "args": { + "External id": 984006,"Record function id": 0, "Ev Idx": 16906 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2338708, "tid": 2338708, + "ts": 6345939456689.612, "dur": 987.463, + "args": { + "External id": 984007,"Record function id": 0, "Ev Idx": 16907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939456779.446, "dur": 8.450, + "args": { + "External id": 984008,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939456801.623, "dur": 44.143, + "args": { + "External id": 984009,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939456807.161, "dur": 2.269, + "args": { + "External id": 984010,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939456814.835, "dur": 0.468, + "args": { + "External id": 984011,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939456816.564, "dur": 0.479, + "args": { + "External id": 984012,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939456818.976, "dur": 0.488, + "args": { + "External id": 984013,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939456822.483, "dur": 0.519, + "args": { + "External id": 984014,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939456824.223, "dur": 0.413, + "args": { + "External id": 984015,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939456825.904, "dur": 3.102, + "args": { + "External id": 984016,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939456831.031, "dur": 0.412, + "args": { + "External id": 984017,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939456837.417, "dur": 0.356, + "args": { + "External id": 984018,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939456859.303, "dur": 56.352, + "args": { + "External id": 984019,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939456949.388, "dur": 197.858, + "args": { + "External id": 984020,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939456960.714, "dur": 4.123, + "args": { + "External id": 984021,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939456970.017, "dur": 12.606, + "args": { + "External id": 984022,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939456974.443, "dur": 7.754, + "args": { + "External id": 984023,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 16923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939456977.937, "dur": 2.909, + "args": { + "External id": 984024,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 16924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939456989.560, "dur": 54.362, + "args": { + "External id": 984025,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939456991.726, "dur": 0.473, + "args": { + "External id": 984026,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939456993.868, "dur": 0.725, + "args": { + "External id": 984027,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939456995.780, "dur": 0.406, + "args": { + "External id": 984028,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939457000.143, "dur": 2.770, + "args": { + "External id": 984029,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939457004.092, "dur": 0.614, + "args": { + "External id": 984030,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939457006.037, "dur": 22.169, + "args": { + "External id": 984031,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939457030.792, "dur": 0.386, + "args": { + "External id": 984032,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939457032.577, "dur": 0.426, + "args": { + "External id": 984033,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939457037.357, "dur": 0.433, + "args": { + "External id": 984034,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939457098.619, "dur": 37.874, + "args": { + "External id": 984035,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939457208.042, "dur": 374.227, + "args": { + "External id": 984036,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 16936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939457241.157, "dur": 336.094, + "args": { + "External id": 984037,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 16937, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939457251.644, "dur": 320.523, + "args": { + "External id": 984038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 16938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939457605.106, "dur": 2.149, + "args": { + "External id": 984039,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 16939, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2338708, "tid": 2338708, + "ts": 6345939457696.805, "dur": 28932.549, + "args": { + "External id": 984040,"Record function id": 0, "Ev Idx": 16940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939457804.200, "dur": 6.580, + "args": { + "External id": 984041,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 16941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939457813.917, "dur": 0.978, + "args": { + "External id": 984042,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939457816.577, "dur": 3.315, + "args": { + "External id": 984043,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939457821.797, "dur": 1.143, + "args": { + "External id": 984044,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939457824.399, "dur": 0.904, + "args": { + "External id": 984045,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 16945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939457828.912, "dur": 0.929, + "args": { + "External id": 984046,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 16946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939457831.388, "dur": 1.094, + "args": { + "External id": 984047,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 16947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939457834.172, "dur": 3.048, + "args": { + "External id": 984048,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939457838.427, "dur": 0.787, + "args": { + "External id": 984049,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939457843.437, "dur": 0.896, + "args": { + "External id": 984050,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 16950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939457862.064, "dur": 28706.040, + "args": { + "External id": 984051,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939457876.735, "dur": 28679.482, + "args": { + "External id": 984052,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 16952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939457892.946, "dur": 18.658, + "args": { + "External id": 984053,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939457915.177, "dur": 28596.503, + "args": { + "External id": 984054,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 16954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939457917.787, "dur": 28592.795, + "args": { + "External id": 984055,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 16955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939457923.840, "dur": 6.108, + "args": { + "External id": 984056,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939457932.062, "dur": 28573.782, + "args": { + "External id": 984057,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 16957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939486793.075, "dur": 38.337, + "args": { + "External id": 984058,"Sequence number": 10552482, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 16958 + } + }, + { + "ph": "s", "id": 437, "pid": 2338708, "tid": 2338708, "ts": 6345939486793.075, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939486814.578, "dur": 11.806, + "args": { + "External id": 984059,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 16959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939486819.713, "dur": 6.434, + "args": { + "External id": 984060,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 16960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939486908.730, "dur": 76.039, + "args": { + "External id": 984061,"Record function id": 0, "Ev Idx": 16961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939486986.102, "dur": 1214.407, + "args": { + "External id": 984062,"Record function id": 0, "Ev Idx": 16962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939487047.113, "dur": 1138.286, + "args": { + "External id": 984063,"Sequence number": 10552483, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 16963 + } + }, + { + "ph": "s", "id": 436, "pid": 2338708, "tid": 2338708, "ts": 6345939487047.113, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939487151.680, "dur": 55.117, + "args": { + "External id": 984064,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939487238.809, "dur": 105.143, + "args": { + "External id": 984065,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939487356.994, "dur": 36.298, + "args": { + "External id": 984066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939487402.164, "dur": 28.854, + "args": { + "External id": 984067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 16967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939487460.320, "dur": 27.827, + "args": { + "External id": 984068,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939487510.083, "dur": 18.480, + "args": { + "External id": 984069,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 16969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939487552.380, "dur": 139.912, + "args": { + "External id": 984070,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 16970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939487606.229, "dur": 11.675, + "args": { + "External id": 984071,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 16971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939487612.055, "dur": 5.099, + "args": { + "External id": 984072,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939487620.740, "dur": 5.395, + "args": { + "External id": 984073,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939487627.236, "dur": 1.168, + "args": { + "External id": 984074,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939487631.105, "dur": 8.495, + "args": { + "External id": 984075,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939487703.438, "dur": 48.658, + "args": { + "External id": 984076,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 16976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939487785.328, "dur": 31.373, + "args": { + "External id": 984077,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 16977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939487827.306, "dur": 40.657, + "args": { + "External id": 984078,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939487874.920, "dur": 35.082, + "args": { + "External id": 984079,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 16979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939487934.503, "dur": 29.464, + "args": { + "External id": 984080,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 16980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939487971.761, "dur": 53.817, + "args": { + "External id": 984081,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 16981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939488049.065, "dur": 57.647, + "args": { + "External id": 984082,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 16982 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 2338708, "tid": 2338708, + "ts": 6345939488272.259, "dur": 82.757, + "args": { + "External id": 984083,"Record function id": 0, "Ev Idx": 16983 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939488434.206, "dur": 45.063, + "args": { + "External id": 984084,"Record function id": 0, "Ev Idx": 16984 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 2338708, "tid": 2338708, + "ts": 6345939488489.347, "dur": 30000.928, + "args": { + "External id": 984085,"Record function id": 0, "Ev Idx": 16985 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2338708, "tid": 2338708, + "ts": 6345939488497.245, "dur": 1032.296, + "args": { + "External id": 984086,"Record function id": 0, "Ev Idx": 16986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939488582.514, "dur": 8.926, + "args": { + "External id": 984087,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 16987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939488606.345, "dur": 46.800, + "args": { + "External id": 984088,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 16988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488611.780, "dur": 2.389, + "args": { + "External id": 984089,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488619.480, "dur": 0.446, + "args": { + "External id": 984090,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488621.120, "dur": 0.360, + "args": { + "External id": 984091,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488623.469, "dur": 0.631, + "args": { + "External id": 984092,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488627.614, "dur": 0.641, + "args": { + "External id": 984093,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488633.027, "dur": 0.349, + "args": { + "External id": 984094,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488635.221, "dur": 5.380, + "args": { + "External id": 984095,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488642.432, "dur": 0.418, + "args": { + "External id": 984096,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488643.923, "dur": 0.404, + "args": { + "External id": 984097,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 16997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939488665.690, "dur": 53.549, + "args": { + "External id": 984098,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 16998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939488752.507, "dur": 132.615, + "args": { + "External id": 984099,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 16999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939488763.196, "dur": 3.433, + "args": { + "External id": 984100,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939488772.327, "dur": 11.589, + "args": { + "External id": 984101,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939488777.141, "dur": 6.375, + "args": { + "External id": 984102,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488781.446, "dur": 0.734, + "args": { + "External id": 984103,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939488791.080, "dur": 36.289, + "args": { + "External id": 984104,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488793.552, "dur": 2.817, + "args": { + "External id": 984105,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488798.030, "dur": 0.640, + "args": { + "External id": 984106,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488799.922, "dur": 0.364, + "args": { + "External id": 984107,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488804.947, "dur": 2.746, + "args": { + "External id": 984108,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488808.802, "dur": 0.329, + "args": { + "External id": 984109,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488811.274, "dur": 0.527, + "args": { + "External id": 984110,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488814.806, "dur": 0.353, + "args": { + "External id": 984111,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488816.806, "dur": 0.323, + "args": { + "External id": 984112,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939488818.969, "dur": 2.614, + "args": { + "External id": 984113,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939488841.282, "dur": 34.779, + "args": { + "External id": 984114,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939488938.030, "dur": 477.759, + "args": { + "External id": 984115,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939488970.176, "dur": 439.418, + "args": { + "External id": 984116,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17016, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939488981.056, "dur": 422.163, + "args": { + "External id": 984117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939489445.338, "dur": 2.450, + "args": { + "External id": 984118,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17018, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2338708, "tid": 2338708, + "ts": 6345939489550.994, "dur": 28719.981, + "args": { + "External id": 984119,"Record function id": 0, "Ev Idx": 17019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939489666.695, "dur": 6.988, + "args": { + "External id": 984120,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939489677.490, "dur": 1.055, + "args": { + "External id": 984121,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939489680.088, "dur": 3.649, + "args": { + "External id": 984122,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939489685.478, "dur": 1.000, + "args": { + "External id": 984123,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939489687.769, "dur": 0.971, + "args": { + "External id": 984124,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939489690.010, "dur": 0.858, + "args": { + "External id": 984125,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939489694.637, "dur": 0.880, + "args": { + "External id": 984126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939489697.038, "dur": 3.046, + "args": { + "External id": 984127,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939489701.557, "dur": 0.840, + "args": { + "External id": 984128,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939489703.915, "dur": 0.762, + "args": { + "External id": 984129,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939489726.973, "dur": 28492.795, + "args": { + "External id": 984130,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939489742.594, "dur": 28468.550, + "args": { + "External id": 984131,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939489759.111, "dur": 16.834, + "args": { + "External id": 984132,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939489779.706, "dur": 28393.395, + "args": { + "External id": 984133,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939489782.170, "dur": 28390.051, + "args": { + "External id": 984134,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939489788.737, "dur": 8.460, + "args": { + "External id": 984135,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939489799.172, "dur": 28369.212, + "args": { + "External id": 984136,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939518427.482, "dur": 36.139, + "args": { + "External id": 984137,"Sequence number": 10552484, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17037 + } + }, + { + "ph": "s", "id": 435, "pid": 2338708, "tid": 2338708, "ts": 6345939518427.482, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939518448.281, "dur": 10.071, + "args": { + "External id": 984138,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939518452.683, "dur": 5.446, + "args": { + "External id": 984139,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939518532.807, "dur": 78.416, + "args": { + "External id": 984140,"Record function id": 0, "Ev Idx": 17040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939518612.548, "dur": 1141.548, + "args": { + "External id": 984141,"Record function id": 0, "Ev Idx": 17041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939518652.542, "dur": 1088.253, + "args": { + "External id": 984142,"Sequence number": 10552485, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17042 + } + }, + { + "ph": "s", "id": 434, "pid": 2338708, "tid": 2338708, "ts": 6345939518652.542, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939518719.619, "dur": 48.301, + "args": { + "External id": 984143,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939518780.799, "dur": 108.041, + "args": { + "External id": 984144,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939518901.264, "dur": 39.812, + "args": { + "External id": 984145,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939518951.573, "dur": 29.640, + "args": { + "External id": 984146,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939519025.377, "dur": 65.168, + "args": { + "External id": 984147,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939519117.028, "dur": 22.664, + "args": { + "External id": 984148,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939519164.820, "dur": 137.054, + "args": { + "External id": 984149,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939519217.491, "dur": 11.777, + "args": { + "External id": 984150,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939519223.275, "dur": 5.159, + "args": { + "External id": 984151,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939519232.246, "dur": 4.460, + "args": { + "External id": 984152,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939519237.888, "dur": 1.127, + "args": { + "External id": 984153,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939519241.875, "dur": 6.196, + "args": { + "External id": 984154,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939519313.526, "dur": 54.078, + "args": { + "External id": 984155,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939519402.855, "dur": 32.813, + "args": { + "External id": 984156,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939519445.571, "dur": 42.200, + "args": { + "External id": 984157,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939519497.101, "dur": 34.743, + "args": { + "External id": 984158,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939519554.488, "dur": 28.866, + "args": { + "External id": 984159,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939519592.306, "dur": 35.455, + "args": { + "External id": 984160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939519649.211, "dur": 18.020, + "args": { + "External id": 984161,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17061 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 2338708, "tid": 2338708, + "ts": 6345939519822.061, "dur": 83.528, + "args": { + "External id": 984162,"Record function id": 0, "Ev Idx": 17062 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939519982.882, "dur": 105.908, + "args": { + "External id": 984163,"Record function id": 0, "Ev Idx": 17063 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 2338708, "tid": 2338708, + "ts": 6345939520102.779, "dur": 30595.035, + "args": { + "External id": 984164,"Record function id": 0, "Ev Idx": 17064 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2338708, "tid": 2338708, + "ts": 6345939520113.402, "dur": 993.104, + "args": { + "External id": 984165,"Record function id": 0, "Ev Idx": 17065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939520203.338, "dur": 10.560, + "args": { + "External id": 984166,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939520227.801, "dur": 43.479, + "args": { + "External id": 984167,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520233.154, "dur": 2.641, + "args": { + "External id": 984168,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520240.577, "dur": 0.517, + "args": { + "External id": 984169,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520242.829, "dur": 0.444, + "args": { + "External id": 984170,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520244.964, "dur": 0.591, + "args": { + "External id": 984171,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520249.064, "dur": 0.892, + "args": { + "External id": 984172,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520251.459, "dur": 0.373, + "args": { + "External id": 984173,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520253.205, "dur": 4.925, + "args": { + "External id": 984174,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520260.107, "dur": 0.519, + "args": { + "External id": 984175,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520261.867, "dur": 0.463, + "args": { + "External id": 984176,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939520284.376, "dur": 60.057, + "args": { + "External id": 984177,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939520386.012, "dur": 141.546, + "args": { + "External id": 984178,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939520408.413, "dur": 5.833, + "args": { + "External id": 984179,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939520419.604, "dur": 10.581, + "args": { + "External id": 984180,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939520424.152, "dur": 5.639, + "args": { + "External id": 984181,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520427.793, "dur": 0.736, + "args": { + "External id": 984182,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939520436.571, "dur": 33.280, + "args": { + "External id": 984183,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520438.624, "dur": 0.618, + "args": { + "External id": 984184,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520441.126, "dur": 2.753, + "args": { + "External id": 984185,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520444.886, "dur": 0.477, + "args": { + "External id": 984186,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520447.036, "dur": 3.047, + "args": { + "External id": 984187,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520454.274, "dur": 0.387, + "args": { + "External id": 984188,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520456.203, "dur": 0.360, + "args": { + "External id": 984189,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520457.599, "dur": 0.260, + "args": { + "External id": 984190,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520462.068, "dur": 0.390, + "args": { + "External id": 984191,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939520463.631, "dur": 0.356, + "args": { + "External id": 984192,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939520485.558, "dur": 33.629, + "args": { + "External id": 984193,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939520583.325, "dur": 371.478, + "args": { + "External id": 984194,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939520614.027, "dur": 335.660, + "args": { + "External id": 984195,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17095, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939520624.980, "dur": 318.746, + "args": { + "External id": 984196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939520976.937, "dur": 2.407, + "args": { + "External id": 984197,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17097, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2338708, "tid": 2338708, + "ts": 6345939521131.069, "dur": 29343.519, + "args": { + "External id": 984198,"Record function id": 0, "Ev Idx": 17098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939521247.631, "dur": 6.996, + "args": { + "External id": 984199,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939521258.220, "dur": 1.211, + "args": { + "External id": 984200,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939521261.255, "dur": 4.078, + "args": { + "External id": 984201,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939521266.930, "dur": 1.317, + "args": { + "External id": 984202,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939521269.593, "dur": 1.090, + "args": { + "External id": 984203,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939521271.779, "dur": 1.487, + "args": { + "External id": 984204,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939521276.993, "dur": 1.019, + "args": { + "External id": 984205,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939521279.622, "dur": 3.151, + "args": { + "External id": 984206,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939521284.135, "dur": 0.764, + "args": { + "External id": 984207,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939521286.383, "dur": 0.891, + "args": { + "External id": 984208,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939521307.960, "dur": 29109.093, + "args": { + "External id": 984209,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939521323.804, "dur": 29082.806, + "args": { + "External id": 984210,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939521340.312, "dur": 16.447, + "args": { + "External id": 984211,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939521360.280, "dur": 29005.194, + "args": { + "External id": 984212,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939521362.676, "dur": 29001.710, + "args": { + "External id": 984213,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939521368.652, "dur": 8.420, + "args": { + "External id": 984214,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939521379.022, "dur": 28980.742, + "args": { + "External id": 984215,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939550635.377, "dur": 36.124, + "args": { + "External id": 984216,"Sequence number": 10552486, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17116 + } + }, + { + "ph": "s", "id": 433, "pid": 2338708, "tid": 2338708, "ts": 6345939550635.377, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939550654.612, "dur": 11.718, + "args": { + "External id": 984217,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939550660.099, "dur": 6.032, + "args": { + "External id": 984218,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939550742.198, "dur": 77.281, + "args": { + "External id": 984219,"Record function id": 0, "Ev Idx": 17119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939550820.750, "dur": 1147.784, + "args": { + "External id": 984220,"Record function id": 0, "Ev Idx": 17120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939550860.690, "dur": 1093.433, + "args": { + "External id": 984221,"Sequence number": 10552487, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17121 + } + }, + { + "ph": "s", "id": 432, "pid": 2338708, "tid": 2338708, "ts": 6345939550860.690, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939550929.341, "dur": 51.932, + "args": { + "External id": 984222,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939550994.264, "dur": 151.323, + "args": { + "External id": 984223,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939551164.252, "dur": 39.711, + "args": { + "External id": 984224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939551215.458, "dur": 29.043, + "args": { + "External id": 984225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939551274.595, "dur": 30.689, + "args": { + "External id": 984226,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939551327.993, "dur": 17.273, + "args": { + "External id": 984227,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939551370.028, "dur": 147.646, + "args": { + "External id": 984228,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939551427.265, "dur": 12.109, + "args": { + "External id": 984229,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939551433.144, "dur": 5.506, + "args": { + "External id": 984230,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939551442.768, "dur": 4.480, + "args": { + "External id": 984231,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939551448.480, "dur": 1.063, + "args": { + "External id": 984232,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939551454.247, "dur": 8.511, + "args": { + "External id": 984233,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939551528.551, "dur": 49.632, + "args": { + "External id": 984234,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939551610.166, "dur": 32.327, + "args": { + "External id": 984235,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939551653.333, "dur": 41.014, + "args": { + "External id": 984236,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939551704.052, "dur": 34.870, + "args": { + "External id": 984237,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939551767.880, "dur": 31.278, + "args": { + "External id": 984238,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939551805.027, "dur": 35.889, + "args": { + "External id": 984239,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939551859.675, "dur": 23.272, + "args": { + "External id": 984240,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17140 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 2338708, "tid": 2338708, + "ts": 6345939552093.240, "dur": 88.230, + "args": { + "External id": 984241,"Record function id": 0, "Ev Idx": 17141 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939552263.060, "dur": 48.310, + "args": { + "External id": 984242,"Record function id": 0, "Ev Idx": 17142 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 2338708, "tid": 2338708, + "ts": 6345939552321.314, "dur": 31600.853, + "args": { + "External id": 984243,"Record function id": 0, "Ev Idx": 17143 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2338708, "tid": 2338708, + "ts": 6345939552328.990, "dur": 974.290, + "args": { + "External id": 984244,"Record function id": 0, "Ev Idx": 17144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939552418.979, "dur": 9.908, + "args": { + "External id": 984245,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939552443.177, "dur": 41.947, + "args": { + "External id": 984246,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552449.485, "dur": 2.397, + "args": { + "External id": 984247,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552457.575, "dur": 0.389, + "args": { + "External id": 984248,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552458.924, "dur": 0.577, + "args": { + "External id": 984249,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552460.636, "dur": 0.468, + "args": { + "External id": 984250,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552464.228, "dur": 0.532, + "args": { + "External id": 984251,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552466.602, "dur": 0.623, + "args": { + "External id": 984252,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552468.655, "dur": 5.147, + "args": { + "External id": 984253,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552475.100, "dur": 0.310, + "args": { + "External id": 984254,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552476.650, "dur": 0.337, + "args": { + "External id": 984255,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939552498.387, "dur": 57.192, + "args": { + "External id": 984256,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939552588.103, "dur": 129.843, + "args": { + "External id": 984257,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939552599.283, "dur": 4.370, + "args": { + "External id": 984258,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939552608.909, "dur": 13.077, + "args": { + "External id": 984259,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939552615.969, "dur": 5.627, + "args": { + "External id": 984260,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552619.593, "dur": 0.857, + "args": { + "External id": 984261,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939552629.255, "dur": 35.769, + "args": { + "External id": 984262,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552631.773, "dur": 2.948, + "args": { + "External id": 984263,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552636.865, "dur": 0.694, + "args": { + "External id": 984264,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552638.649, "dur": 0.323, + "args": { + "External id": 984265,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552643.262, "dur": 2.963, + "args": { + "External id": 984266,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552647.256, "dur": 0.467, + "args": { + "External id": 984267,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552649.549, "dur": 0.473, + "args": { + "External id": 984268,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552653.328, "dur": 0.532, + "args": { + "External id": 984269,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552655.311, "dur": 0.517, + "args": { + "External id": 984270,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939552657.037, "dur": 2.530, + "args": { + "External id": 984271,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939552676.923, "dur": 33.053, + "args": { + "External id": 984272,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939552770.850, "dur": 421.621, + "args": { + "External id": 984273,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939552802.988, "dur": 383.943, + "args": { + "External id": 984274,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17174, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939552813.279, "dur": 367.554, + "args": { + "External id": 984275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939553221.305, "dur": 2.673, + "args": { + "External id": 984276,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17176, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2338708, "tid": 2338708, + "ts": 6345939553325.385, "dur": 30361.652, + "args": { + "External id": 984277,"Record function id": 0, "Ev Idx": 17177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939553435.896, "dur": 6.926, + "args": { + "External id": 984278,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939553446.249, "dur": 1.057, + "args": { + "External id": 984279,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939553449.342, "dur": 3.560, + "args": { + "External id": 984280,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939553454.732, "dur": 1.026, + "args": { + "External id": 984281,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939553457.131, "dur": 0.979, + "args": { + "External id": 984282,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939553459.578, "dur": 0.987, + "args": { + "External id": 984283,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939553464.538, "dur": 1.131, + "args": { + "External id": 984284,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939553467.367, "dur": 2.725, + "args": { + "External id": 984285,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939553471.381, "dur": 0.709, + "args": { + "External id": 984286,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939553473.619, "dur": 0.662, + "args": { + "External id": 984287,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939553494.913, "dur": 30130.411, + "args": { + "External id": 984288,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939553509.915, "dur": 30104.566, + "args": { + "External id": 984289,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939553526.291, "dur": 16.646, + "args": { + "External id": 984290,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939553546.477, "dur": 30022.160, + "args": { + "External id": 984291,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939553548.960, "dur": 30018.517, + "args": { + "External id": 984292,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939553555.555, "dur": 6.005, + "args": { + "External id": 984293,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939553563.300, "dur": 29999.843, + "args": { + "External id": 984294,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939583857.183, "dur": 37.462, + "args": { + "External id": 984295,"Sequence number": 10552488, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17195 + } + }, + { + "ph": "s", "id": 431, "pid": 2338708, "tid": 2338708, "ts": 6345939583857.183, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939583878.029, "dur": 11.711, + "args": { + "External id": 984296,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939583882.974, "dur": 6.560, + "args": { + "External id": 984297,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939583964.119, "dur": 121.924, + "args": { + "External id": 984298,"Record function id": 0, "Ev Idx": 17198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939584089.016, "dur": 1145.987, + "args": { + "External id": 984299,"Record function id": 0, "Ev Idx": 17199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939584135.382, "dur": 1084.860, + "args": { + "External id": 984300,"Sequence number": 10552489, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17200 + } + }, + { + "ph": "s", "id": 430, "pid": 2338708, "tid": 2338708, "ts": 6345939584135.382, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939584208.907, "dur": 53.199, + "args": { + "External id": 984301,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939584275.547, "dur": 105.903, + "args": { + "External id": 984302,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939584396.053, "dur": 38.444, + "args": { + "External id": 984303,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939584443.712, "dur": 29.349, + "args": { + "External id": 984304,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939584499.394, "dur": 26.232, + "args": { + "External id": 984305,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939584546.847, "dur": 16.830, + "args": { + "External id": 984306,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939584587.534, "dur": 138.074, + "args": { + "External id": 984307,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939584642.667, "dur": 11.827, + "args": { + "External id": 984308,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939584648.307, "dur": 5.379, + "args": { + "External id": 984309,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939584657.643, "dur": 4.793, + "args": { + "External id": 984310,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939584663.838, "dur": 0.956, + "args": { + "External id": 984311,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939584667.426, "dur": 6.343, + "args": { + "External id": 984312,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939584737.058, "dur": 46.177, + "args": { + "External id": 984313,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939584817.275, "dur": 31.702, + "args": { + "External id": 984314,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939584858.814, "dur": 43.085, + "args": { + "External id": 984315,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939584911.148, "dur": 34.514, + "args": { + "External id": 984316,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939584969.414, "dur": 29.210, + "args": { + "External id": 984317,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939585006.519, "dur": 91.823, + "args": { + "External id": 984318,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939585123.410, "dur": 21.012, + "args": { + "External id": 984319,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17219 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 2338708, "tid": 2338708, + "ts": 6345939585305.075, "dur": 83.933, + "args": { + "External id": 984320,"Record function id": 0, "Ev Idx": 17220 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939585465.668, "dur": 48.369, + "args": { + "External id": 984321,"Record function id": 0, "Ev Idx": 17221 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 2338708, "tid": 2338708, + "ts": 6345939585524.970, "dur": 30134.358, + "args": { + "External id": 984322,"Record function id": 0, "Ev Idx": 17222 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2338708, "tid": 2338708, + "ts": 6345939585534.527, "dur": 998.458, + "args": { + "External id": 984323,"Record function id": 0, "Ev Idx": 17223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939585620.346, "dur": 9.316, + "args": { + "External id": 984324,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939585643.213, "dur": 40.437, + "args": { + "External id": 984325,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585648.897, "dur": 2.200, + "args": { + "External id": 984326,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585656.869, "dur": 0.348, + "args": { + "External id": 984327,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585658.158, "dur": 0.589, + "args": { + "External id": 984328,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585660.370, "dur": 0.640, + "args": { + "External id": 984329,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585663.744, "dur": 0.530, + "args": { + "External id": 984330,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585665.881, "dur": 0.561, + "args": { + "External id": 984331,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585667.865, "dur": 4.505, + "args": { + "External id": 984332,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585674.092, "dur": 0.581, + "args": { + "External id": 984333,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585675.804, "dur": 0.359, + "args": { + "External id": 984334,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939585695.488, "dur": 53.044, + "args": { + "External id": 984335,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939585783.092, "dur": 127.102, + "args": { + "External id": 984336,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939585793.895, "dur": 4.171, + "args": { + "External id": 984337,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939585803.707, "dur": 13.714, + "args": { + "External id": 984338,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939585811.376, "dur": 5.641, + "args": { + "External id": 984339,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585814.964, "dur": 0.812, + "args": { + "External id": 984340,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939585824.091, "dur": 32.088, + "args": { + "External id": 984341,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585826.261, "dur": 2.953, + "args": { + "External id": 984342,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585830.824, "dur": 0.375, + "args": { + "External id": 984343,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585832.486, "dur": 0.400, + "args": { + "External id": 984344,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585836.458, "dur": 2.540, + "args": { + "External id": 984345,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585840.296, "dur": 0.243, + "args": { + "External id": 984346,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585841.766, "dur": 0.279, + "args": { + "External id": 984347,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585845.072, "dur": 0.319, + "args": { + "External id": 984348,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585846.627, "dur": 0.299, + "args": { + "External id": 984349,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939585848.135, "dur": 2.862, + "args": { + "External id": 984350,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939585870.668, "dur": 31.755, + "args": { + "External id": 984351,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939585965.451, "dur": 453.578, + "args": { + "External id": 984352,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939585997.099, "dur": 415.253, + "args": { + "External id": 984353,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17253, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939586026.763, "dur": 378.348, + "args": { + "External id": 984354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939586447.628, "dur": 2.757, + "args": { + "External id": 984355,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17255, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2338708, "tid": 2338708, + "ts": 6345939586556.458, "dur": 28881.760, + "args": { + "External id": 984356,"Record function id": 0, "Ev Idx": 17256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939586670.855, "dur": 7.172, + "args": { + "External id": 984357,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939586682.135, "dur": 1.181, + "args": { + "External id": 984358,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939586685.088, "dur": 3.412, + "args": { + "External id": 984359,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939586690.383, "dur": 1.031, + "args": { + "External id": 984360,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939586692.668, "dur": 1.009, + "args": { + "External id": 984361,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939586694.904, "dur": 1.011, + "args": { + "External id": 984362,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939586699.800, "dur": 1.016, + "args": { + "External id": 984363,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939586708.464, "dur": 3.105, + "args": { + "External id": 984364,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939586712.880, "dur": 0.903, + "args": { + "External id": 984365,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939586715.682, "dur": 0.945, + "args": { + "External id": 984366,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939586739.004, "dur": 28645.872, + "args": { + "External id": 984367,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939586754.267, "dur": 28621.274, + "args": { + "External id": 984368,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939586770.231, "dur": 16.543, + "args": { + "External id": 984369,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939586790.407, "dur": 28548.763, + "args": { + "External id": 984370,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939586793.025, "dur": 28545.166, + "args": { + "External id": 984371,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939586799.195, "dur": 5.999, + "args": { + "External id": 984372,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939586806.847, "dur": 28527.573, + "args": { + "External id": 984373,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939615599.351, "dur": 33.412, + "args": { + "External id": 984374,"Sequence number": 10552490, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17274 + } + }, + { + "ph": "s", "id": 429, "pid": 2338708, "tid": 2338708, "ts": 6345939615599.351, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939615617.276, "dur": 10.258, + "args": { + "External id": 984375,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939615621.829, "dur": 5.522, + "args": { + "External id": 984376,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939615701.281, "dur": 79.033, + "args": { + "External id": 984377,"Record function id": 0, "Ev Idx": 17277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939615781.678, "dur": 1159.564, + "args": { + "External id": 984378,"Record function id": 0, "Ev Idx": 17278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939615821.825, "dur": 1105.195, + "args": { + "External id": 984379,"Sequence number": 10552491, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17279 + } + }, + { + "ph": "s", "id": 428, "pid": 2338708, "tid": 2338708, "ts": 6345939615821.825, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939615888.085, "dur": 48.079, + "args": { + "External id": 984380,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939615948.404, "dur": 148.111, + "args": { + "External id": 984381,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939616116.022, "dur": 43.569, + "args": { + "External id": 984382,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939616169.943, "dur": 30.382, + "args": { + "External id": 984383,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939616231.296, "dur": 33.211, + "args": { + "External id": 984384,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939616286.078, "dur": 17.944, + "args": { + "External id": 984385,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939616332.278, "dur": 142.853, + "args": { + "External id": 984386,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939616387.474, "dur": 12.087, + "args": { + "External id": 984387,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939616392.805, "dur": 5.799, + "args": { + "External id": 984388,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939616402.200, "dur": 4.741, + "args": { + "External id": 984389,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939616408.427, "dur": 1.153, + "args": { + "External id": 984390,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939616412.260, "dur": 7.137, + "args": { + "External id": 984391,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939616489.025, "dur": 49.417, + "args": { + "External id": 984392,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939616575.756, "dur": 31.503, + "args": { + "External id": 984393,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939616617.612, "dur": 41.473, + "args": { + "External id": 984394,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939616670.899, "dur": 38.871, + "args": { + "External id": 984395,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939616736.019, "dur": 29.104, + "args": { + "External id": 984396,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939616771.972, "dur": 37.196, + "args": { + "External id": 984397,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939616832.161, "dur": 20.506, + "args": { + "External id": 984398,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17298 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 2338708, "tid": 2338708, + "ts": 6345939617028.073, "dur": 131.333, + "args": { + "External id": 984399,"Record function id": 0, "Ev Idx": 17299 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939617242.983, "dur": 49.334, + "args": { + "External id": 984400,"Record function id": 0, "Ev Idx": 17300 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 2338708, "tid": 2338708, + "ts": 6345939617302.523, "dur": 30340.020, + "args": { + "External id": 984401,"Record function id": 0, "Ev Idx": 17301 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2338708, "tid": 2338708, + "ts": 6345939617311.262, "dur": 990.748, + "args": { + "External id": 984402,"Record function id": 0, "Ev Idx": 17302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939617402.352, "dur": 10.536, + "args": { + "External id": 984403,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939617426.804, "dur": 40.346, + "args": { + "External id": 984404,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617432.708, "dur": 2.473, + "args": { + "External id": 984405,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617440.788, "dur": 0.382, + "args": { + "External id": 984406,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617442.461, "dur": 0.404, + "args": { + "External id": 984407,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617444.574, "dur": 0.349, + "args": { + "External id": 984408,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617447.974, "dur": 0.518, + "args": { + "External id": 984409,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617449.746, "dur": 0.655, + "args": { + "External id": 984410,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617451.548, "dur": 4.948, + "args": { + "External id": 984411,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617458.380, "dur": 0.434, + "args": { + "External id": 984412,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617459.703, "dur": 0.267, + "args": { + "External id": 984413,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939617481.016, "dur": 57.092, + "args": { + "External id": 984414,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939617571.008, "dur": 127.947, + "args": { + "External id": 984415,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939617582.395, "dur": 4.415, + "args": { + "External id": 984416,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939617593.239, "dur": 12.927, + "args": { + "External id": 984417,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939617600.091, "dur": 5.657, + "args": { + "External id": 984418,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617603.979, "dur": 0.604, + "args": { + "External id": 984419,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939617613.137, "dur": 33.322, + "args": { + "External id": 984420,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617615.068, "dur": 2.803, + "args": { + "External id": 984421,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617619.746, "dur": 0.455, + "args": { + "External id": 984422,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617621.593, "dur": 0.413, + "args": { + "External id": 984423,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617626.006, "dur": 3.204, + "args": { + "External id": 984424,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617630.184, "dur": 0.375, + "args": { + "External id": 984425,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617632.385, "dur": 0.481, + "args": { + "External id": 984426,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617636.108, "dur": 0.463, + "args": { + "External id": 984427,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617638.323, "dur": 0.409, + "args": { + "External id": 984428,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939617639.612, "dur": 2.620, + "args": { + "External id": 984429,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939617658.031, "dur": 32.299, + "args": { + "External id": 984430,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939617752.836, "dur": 437.682, + "args": { + "External id": 984431,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939617785.603, "dur": 398.422, + "args": { + "External id": 984432,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17332, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939617796.024, "dur": 380.479, + "args": { + "External id": 984433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939618219.650, "dur": 2.629, + "args": { + "External id": 984434,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17334, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2338708, "tid": 2338708, + "ts": 6345939618324.807, "dur": 29101.651, + "args": { + "External id": 984435,"Record function id": 0, "Ev Idx": 17335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939618436.444, "dur": 7.154, + "args": { + "External id": 984436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939618447.176, "dur": 0.817, + "args": { + "External id": 984437,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939618449.933, "dur": 3.838, + "args": { + "External id": 984438,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939618455.723, "dur": 0.695, + "args": { + "External id": 984439,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939618457.978, "dur": 0.918, + "args": { + "External id": 984440,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939618460.167, "dur": 1.085, + "args": { + "External id": 984441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939618465.422, "dur": 0.860, + "args": { + "External id": 984442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939618468.286, "dur": 1.726, + "args": { + "External id": 984443,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939618471.425, "dur": 0.714, + "args": { + "External id": 984444,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939618473.738, "dur": 0.641, + "args": { + "External id": 984445,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939618495.088, "dur": 28880.310, + "args": { + "External id": 984446,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939618510.723, "dur": 28855.341, + "args": { + "External id": 984447,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939618526.488, "dur": 16.391, + "args": { + "External id": 984448,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939618546.458, "dur": 28782.640, + "args": { + "External id": 984449,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939618549.227, "dur": 28778.877, + "args": { + "External id": 984450,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939618560.160, "dur": 6.058, + "args": { + "External id": 984451,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939618568.267, "dur": 28755.977, + "args": { + "External id": 984452,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939647581.355, "dur": 34.532, + "args": { + "External id": 984453,"Sequence number": 10552492, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17353 + } + }, + { + "ph": "s", "id": 427, "pid": 2338708, "tid": 2338708, "ts": 6345939647581.355, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939647601.262, "dur": 9.897, + "args": { + "External id": 984454,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939647605.548, "dur": 5.439, + "args": { + "External id": 984455,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939647684.829, "dur": 76.593, + "args": { + "External id": 984456,"Record function id": 0, "Ev Idx": 17356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939647763.017, "dur": 1134.465, + "args": { + "External id": 984457,"Record function id": 0, "Ev Idx": 17357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939647804.906, "dur": 1078.260, + "args": { + "External id": 984458,"Sequence number": 10552493, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17358 + } + }, + { + "ph": "s", "id": 426, "pid": 2338708, "tid": 2338708, "ts": 6345939647804.906, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939647870.155, "dur": 48.287, + "args": { + "External id": 984459,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939647930.853, "dur": 115.698, + "args": { + "External id": 984460,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939648100.441, "dur": 47.243, + "args": { + "External id": 984461,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939648160.267, "dur": 31.715, + "args": { + "External id": 984462,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939648222.107, "dur": 30.972, + "args": { + "External id": 984463,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939648274.840, "dur": 18.793, + "args": { + "External id": 984464,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939648318.142, "dur": 139.941, + "args": { + "External id": 984465,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939648373.482, "dur": 12.227, + "args": { + "External id": 984466,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939648379.296, "dur": 5.663, + "args": { + "External id": 984467,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939648388.608, "dur": 4.522, + "args": { + "External id": 984468,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939648394.332, "dur": 0.804, + "args": { + "External id": 984469,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939648397.736, "dur": 6.376, + "args": { + "External id": 984470,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939648469.933, "dur": 45.709, + "args": { + "External id": 984471,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939648553.360, "dur": 30.078, + "args": { + "External id": 984472,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939648594.115, "dur": 41.401, + "args": { + "External id": 984473,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939648645.557, "dur": 34.633, + "args": { + "External id": 984474,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939648704.155, "dur": 27.942, + "args": { + "External id": 984475,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939648740.324, "dur": 35.154, + "args": { + "External id": 984476,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939648793.857, "dur": 16.552, + "args": { + "External id": 984477,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17377 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 2338708, "tid": 2338708, + "ts": 6345939648964.318, "dur": 140.753, + "args": { + "External id": 984478,"Record function id": 0, "Ev Idx": 17378 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939649188.884, "dur": 50.705, + "args": { + "External id": 984479,"Record function id": 0, "Ev Idx": 17379 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 2338708, "tid": 2338708, + "ts": 6345939649249.545, "dur": 31509.260, + "args": { + "External id": 984480,"Record function id": 0, "Ev Idx": 17380 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2338708, "tid": 2338708, + "ts": 6345939649258.164, "dur": 1037.265, + "args": { + "External id": 984481,"Record function id": 0, "Ev Idx": 17381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939649350.321, "dur": 10.136, + "args": { + "External id": 984482,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939649373.880, "dur": 39.812, + "args": { + "External id": 984483,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649379.690, "dur": 2.606, + "args": { + "External id": 984484,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649387.351, "dur": 0.373, + "args": { + "External id": 984485,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649389.224, "dur": 0.427, + "args": { + "External id": 984486,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649391.581, "dur": 0.462, + "args": { + "External id": 984487,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649395.305, "dur": 0.376, + "args": { + "External id": 984488,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649397.302, "dur": 0.363, + "args": { + "External id": 984489,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649398.868, "dur": 4.825, + "args": { + "External id": 984490,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649405.311, "dur": 0.301, + "args": { + "External id": 984491,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649406.679, "dur": 0.558, + "args": { + "External id": 984492,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939649426.778, "dur": 56.531, + "args": { + "External id": 984493,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939649518.326, "dur": 127.538, + "args": { + "External id": 984494,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939649529.032, "dur": 3.633, + "args": { + "External id": 984495,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939649538.410, "dur": 13.006, + "args": { + "External id": 984496,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939649545.341, "dur": 5.653, + "args": { + "External id": 984497,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649549.142, "dur": 0.633, + "args": { + "External id": 984498,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939649558.194, "dur": 31.865, + "args": { + "External id": 984499,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649560.470, "dur": 2.799, + "args": { + "External id": 984500,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649565.066, "dur": 0.481, + "args": { + "External id": 984501,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649566.713, "dur": 0.466, + "args": { + "External id": 984502,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649570.976, "dur": 2.280, + "args": { + "External id": 984503,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649574.382, "dur": 0.247, + "args": { + "External id": 984504,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649575.939, "dur": 0.380, + "args": { + "External id": 984505,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649579.909, "dur": 0.300, + "args": { + "External id": 984506,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649581.510, "dur": 0.440, + "args": { + "External id": 984507,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939649583.017, "dur": 2.422, + "args": { + "External id": 984508,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939649603.078, "dur": 34.374, + "args": { + "External id": 984509,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939649702.183, "dur": 481.645, + "args": { + "External id": 984510,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939649733.960, "dur": 443.964, + "args": { + "External id": 984511,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17411, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939649744.861, "dur": 423.125, + "args": { + "External id": 984512,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939650212.493, "dur": 2.868, + "args": { + "External id": 984513,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17413, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2338708, "tid": 2338708, + "ts": 6345939650317.789, "dur": 30206.737, + "args": { + "External id": 984514,"Record function id": 0, "Ev Idx": 17414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939650431.819, "dur": 7.040, + "args": { + "External id": 984515,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939650442.570, "dur": 1.219, + "args": { + "External id": 984516,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939650445.543, "dur": 3.778, + "args": { + "External id": 984517,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939650451.030, "dur": 0.815, + "args": { + "External id": 984518,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939650453.436, "dur": 0.876, + "args": { + "External id": 984519,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939650456.041, "dur": 0.797, + "args": { + "External id": 984520,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939650460.083, "dur": 1.132, + "args": { + "External id": 984521,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939650462.658, "dur": 2.171, + "args": { + "External id": 984522,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939650466.272, "dur": 1.222, + "args": { + "External id": 984523,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939650469.049, "dur": 0.863, + "args": { + "External id": 984524,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939650490.858, "dur": 29970.439, + "args": { + "External id": 984525,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939650506.915, "dur": 29943.262, + "args": { + "External id": 984526,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939650522.983, "dur": 16.731, + "args": { + "External id": 984527,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939650543.206, "dur": 29863.120, + "args": { + "External id": 984528,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939650545.754, "dur": 29859.428, + "args": { + "External id": 984529,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939650551.996, "dur": 5.546, + "args": { + "External id": 984530,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939650559.170, "dur": 29841.604, + "args": { + "External id": 984531,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939680694.935, "dur": 36.796, + "args": { + "External id": 984532,"Sequence number": 10552494, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17432 + } + }, + { + "ph": "s", "id": 425, "pid": 2338708, "tid": 2338708, "ts": 6345939680694.935, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939680715.156, "dur": 12.001, + "args": { + "External id": 984533,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939680720.281, "dur": 6.552, + "args": { + "External id": 984534,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939680801.513, "dur": 71.142, + "args": { + "External id": 984535,"Record function id": 0, "Ev Idx": 17435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939680874.355, "dur": 1152.171, + "args": { + "External id": 984536,"Record function id": 0, "Ev Idx": 17436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939680916.177, "dur": 1079.624, + "args": { + "External id": 984537,"Sequence number": 10552495, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17437 + } + }, + { + "ph": "s", "id": 424, "pid": 2338708, "tid": 2338708, "ts": 6345939680916.177, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939680984.722, "dur": 100.341, + "args": { + "External id": 984538,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939681103.893, "dur": 97.178, + "args": { + "External id": 984539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939681215.183, "dur": 37.007, + "args": { + "External id": 984540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939681261.979, "dur": 29.834, + "args": { + "External id": 984541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939681324.944, "dur": 33.395, + "args": { + "External id": 984542,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939681377.329, "dur": 19.902, + "args": { + "External id": 984543,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939681421.863, "dur": 141.283, + "args": { + "External id": 984544,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939681477.744, "dur": 12.366, + "args": { + "External id": 984545,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939681483.856, "dur": 5.500, + "args": { + "External id": 984546,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939681492.904, "dur": 4.443, + "args": { + "External id": 984547,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939681498.842, "dur": 1.219, + "args": { + "External id": 984548,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939681503.086, "dur": 7.990, + "args": { + "External id": 984549,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939681574.798, "dur": 48.591, + "args": { + "External id": 984550,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939681656.604, "dur": 32.028, + "args": { + "External id": 984551,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939681701.679, "dur": 41.634, + "args": { + "External id": 984552,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939681750.085, "dur": 35.837, + "args": { + "External id": 984553,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939681808.536, "dur": 27.809, + "args": { + "External id": 984554,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939681847.470, "dur": 36.863, + "args": { + "External id": 984555,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939681902.987, "dur": 22.173, + "args": { + "External id": 984556,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17456 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 2338708, "tid": 2338708, + "ts": 6345939682132.683, "dur": 88.694, + "args": { + "External id": 984557,"Record function id": 0, "Ev Idx": 17457 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939682298.813, "dur": 47.834, + "args": { + "External id": 984558,"Record function id": 0, "Ev Idx": 17458 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 2338708, "tid": 2338708, + "ts": 6345939682356.476, "dur": 30604.121, + "args": { + "External id": 984559,"Record function id": 0, "Ev Idx": 17459 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2338708, "tid": 2338708, + "ts": 6345939682364.918, "dur": 1002.060, + "args": { + "External id": 984560,"Record function id": 0, "Ev Idx": 17460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939682455.735, "dur": 9.248, + "args": { + "External id": 984561,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939682479.147, "dur": 39.456, + "args": { + "External id": 984562,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682485.207, "dur": 2.345, + "args": { + "External id": 984563,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682492.955, "dur": 0.394, + "args": { + "External id": 984564,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682494.695, "dur": 0.557, + "args": { + "External id": 984565,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682497.026, "dur": 0.479, + "args": { + "External id": 984566,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682500.461, "dur": 0.466, + "args": { + "External id": 984567,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682502.362, "dur": 0.354, + "args": { + "External id": 984568,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682504.147, "dur": 4.766, + "args": { + "External id": 984569,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682510.442, "dur": 0.283, + "args": { + "External id": 984570,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682511.956, "dur": 0.404, + "args": { + "External id": 984571,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939682531.006, "dur": 63.395, + "args": { + "External id": 984572,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939682627.718, "dur": 141.378, + "args": { + "External id": 984573,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939682639.321, "dur": 4.226, + "args": { + "External id": 984574,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939682649.572, "dur": 28.431, + "args": { + "External id": 984575,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939682656.498, "dur": 21.084, + "args": { + "External id": 984576,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682675.787, "dur": 0.638, + "args": { + "External id": 984577,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939682684.831, "dur": 29.712, + "args": { + "External id": 984578,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682686.721, "dur": 2.881, + "args": { + "External id": 984579,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682690.880, "dur": 0.356, + "args": { + "External id": 984580,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682692.308, "dur": 0.310, + "args": { + "External id": 984581,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682696.422, "dur": 3.004, + "args": { + "External id": 984582,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682700.227, "dur": 0.373, + "args": { + "External id": 984583,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682701.957, "dur": 0.330, + "args": { + "External id": 984584,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682705.061, "dur": 0.316, + "args": { + "External id": 984585,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682706.534, "dur": 0.392, + "args": { + "External id": 984586,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939682707.914, "dur": 2.273, + "args": { + "External id": 984587,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939682726.505, "dur": 34.776, + "args": { + "External id": 984588,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939682823.291, "dur": 434.898, + "args": { + "External id": 984589,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939682854.669, "dur": 397.498, + "args": { + "External id": 984590,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17490, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939682864.956, "dur": 380.371, + "args": { + "External id": 984591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939683286.912, "dur": 2.604, + "args": { + "External id": 984592,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17492, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2338708, "tid": 2338708, + "ts": 6345939683390.260, "dur": 29355.834, + "args": { + "External id": 984593,"Record function id": 0, "Ev Idx": 17493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939683500.824, "dur": 7.328, + "args": { + "External id": 984594,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939683511.722, "dur": 1.165, + "args": { + "External id": 984595,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939683514.367, "dur": 3.658, + "args": { + "External id": 984596,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939683519.808, "dur": 0.869, + "args": { + "External id": 984597,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939683522.522, "dur": 0.861, + "args": { + "External id": 984598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939683524.642, "dur": 1.281, + "args": { + "External id": 984599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939683529.701, "dur": 0.697, + "args": { + "External id": 984600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939683532.150, "dur": 2.857, + "args": { + "External id": 984601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939683536.244, "dur": 0.760, + "args": { + "External id": 984602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939683538.830, "dur": 1.088, + "args": { + "External id": 984603,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939683560.323, "dur": 29134.370, + "args": { + "External id": 984604,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939683574.941, "dur": 29109.993, + "args": { + "External id": 984605,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939683595.407, "dur": 16.684, + "args": { + "External id": 984606,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939683615.650, "dur": 29030.581, + "args": { + "External id": 984607,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939683617.990, "dur": 29027.374, + "args": { + "External id": 984608,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939683624.083, "dur": 5.635, + "args": { + "External id": 984609,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939683631.764, "dur": 29009.716, + "args": { + "External id": 984610,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939712900.907, "dur": 33.885, + "args": { + "External id": 984611,"Sequence number": 10552496, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17511 + } + }, + { + "ph": "s", "id": 423, "pid": 2338708, "tid": 2338708, "ts": 6345939712900.907, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939712919.946, "dur": 10.065, + "args": { + "External id": 984612,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939712924.408, "dur": 5.398, + "args": { + "External id": 984613,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939713002.214, "dur": 121.966, + "args": { + "External id": 984614,"Record function id": 0, "Ev Idx": 17514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939713126.925, "dur": 1147.923, + "args": { + "External id": 984615,"Record function id": 0, "Ev Idx": 17515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939713171.287, "dur": 1089.206, + "args": { + "External id": 984616,"Sequence number": 10552497, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17516 + } + }, + { + "ph": "s", "id": 422, "pid": 2338708, "tid": 2338708, "ts": 6345939713171.287, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939713245.026, "dur": 52.587, + "args": { + "External id": 984617,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939713310.615, "dur": 105.611, + "args": { + "External id": 984618,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939713430.277, "dur": 40.138, + "args": { + "External id": 984619,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939713480.542, "dur": 29.262, + "args": { + "External id": 984620,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939713537.309, "dur": 26.458, + "args": { + "External id": 984621,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939713584.209, "dur": 17.880, + "args": { + "External id": 984622,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939713625.726, "dur": 132.196, + "args": { + "External id": 984623,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939713677.599, "dur": 11.441, + "args": { + "External id": 984624,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939713682.933, "dur": 5.384, + "args": { + "External id": 984625,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939713691.747, "dur": 4.912, + "args": { + "External id": 984626,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939713697.734, "dur": 0.915, + "args": { + "External id": 984627,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939713701.316, "dur": 5.339, + "args": { + "External id": 984628,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939713769.004, "dur": 46.642, + "args": { + "External id": 984629,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939713850.239, "dur": 31.752, + "args": { + "External id": 984630,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939713892.300, "dur": 42.969, + "args": { + "External id": 984631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939713944.201, "dur": 35.182, + "args": { + "External id": 984632,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939714003.341, "dur": 89.302, + "args": { + "External id": 984633,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939714104.068, "dur": 43.306, + "args": { + "External id": 984634,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939714169.141, "dur": 18.321, + "args": { + "External id": 984635,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17535 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 2338708, "tid": 2338708, + "ts": 6345939714344.304, "dur": 85.342, + "args": { + "External id": 984636,"Record function id": 0, "Ev Idx": 17536 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939714505.938, "dur": 46.112, + "args": { + "External id": 984637,"Record function id": 0, "Ev Idx": 17537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 2338708, "tid": 2338708, + "ts": 6345939714561.678, "dur": 31211.354, + "args": { + "External id": 984638,"Record function id": 0, "Ev Idx": 17538 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2338708, "tid": 2338708, + "ts": 6345939714570.241, "dur": 974.159, + "args": { + "External id": 984639,"Record function id": 0, "Ev Idx": 17539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939714659.425, "dur": 9.550, + "args": { + "External id": 984640,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939714682.551, "dur": 38.288, + "args": { + "External id": 984641,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714688.247, "dur": 2.252, + "args": { + "External id": 984642,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714694.905, "dur": 0.388, + "args": { + "External id": 984643,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714696.478, "dur": 0.401, + "args": { + "External id": 984644,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714698.459, "dur": 0.530, + "args": { + "External id": 984645,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714701.951, "dur": 0.488, + "args": { + "External id": 984646,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714704.054, "dur": 0.317, + "args": { + "External id": 984647,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714705.446, "dur": 4.668, + "args": { + "External id": 984648,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714711.946, "dur": 0.265, + "args": { + "External id": 984649,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714713.116, "dur": 0.294, + "args": { + "External id": 984650,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939714733.002, "dur": 51.810, + "args": { + "External id": 984651,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939714825.738, "dur": 124.525, + "args": { + "External id": 984652,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939714837.204, "dur": 5.150, + "args": { + "External id": 984653,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939714848.153, "dur": 10.498, + "args": { + "External id": 984654,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939714852.452, "dur": 5.757, + "args": { + "External id": 984655,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714856.179, "dur": 0.630, + "args": { + "External id": 984656,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939714865.348, "dur": 31.461, + "args": { + "External id": 984657,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714867.480, "dur": 0.616, + "args": { + "External id": 984658,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714869.956, "dur": 2.563, + "args": { + "External id": 984659,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714873.491, "dur": 0.687, + "args": { + "External id": 984660,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714875.679, "dur": 2.854, + "args": { + "External id": 984661,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714882.179, "dur": 0.436, + "args": { + "External id": 984662,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714884.103, "dur": 0.439, + "args": { + "External id": 984663,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714885.766, "dur": 0.271, + "args": { + "External id": 984664,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714889.780, "dur": 0.343, + "args": { + "External id": 984665,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939714891.317, "dur": 0.538, + "args": { + "External id": 984666,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939714909.867, "dur": 32.206, + "args": { + "External id": 984667,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939715003.754, "dur": 438.827, + "args": { + "External id": 984668,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939715090.579, "dur": 346.725, + "args": { + "External id": 984669,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17569, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939715102.367, "dur": 329.474, + "args": { + "External id": 984670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939715466.755, "dur": 2.360, + "args": { + "External id": 984671,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17571, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2338708, "tid": 2338708, + "ts": 6345939715565.285, "dur": 29989.250, + "args": { + "External id": 984672,"Record function id": 0, "Ev Idx": 17572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939715674.558, "dur": 6.771, + "args": { + "External id": 984673,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939715684.301, "dur": 1.058, + "args": { + "External id": 984674,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939715686.743, "dur": 3.127, + "args": { + "External id": 984675,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939715691.635, "dur": 0.668, + "args": { + "External id": 984676,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939715693.383, "dur": 1.164, + "args": { + "External id": 984677,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939715698.198, "dur": 0.795, + "args": { + "External id": 984678,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939715700.556, "dur": 0.866, + "args": { + "External id": 984679,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939715702.985, "dur": 2.342, + "args": { + "External id": 984680,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939715706.440, "dur": 0.911, + "args": { + "External id": 984681,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939715711.022, "dur": 0.993, + "args": { + "External id": 984682,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939715729.455, "dur": 29774.619, + "args": { + "External id": 984683,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939715744.321, "dur": 29750.230, + "args": { + "External id": 984684,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939715762.870, "dur": 16.839, + "args": { + "External id": 984685,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939715783.272, "dur": 29676.142, + "args": { + "External id": 984686,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939715785.964, "dur": 29672.556, + "args": { + "External id": 984687,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939715792.439, "dur": 5.639, + "args": { + "External id": 984688,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939715799.974, "dur": 29654.726, + "args": { + "External id": 984689,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939745711.262, "dur": 32.875, + "args": { + "External id": 984690,"Sequence number": 10552498, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17590 + } + }, + { + "ph": "s", "id": 421, "pid": 2338708, "tid": 2338708, "ts": 6345939745711.262, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939745729.147, "dur": 10.153, + "args": { + "External id": 984691,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939745733.397, "dur": 5.697, + "args": { + "External id": 984692,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939745815.303, "dur": 76.225, + "args": { + "External id": 984693,"Record function id": 0, "Ev Idx": 17593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939745892.663, "dur": 1139.578, + "args": { + "External id": 984694,"Record function id": 0, "Ev Idx": 17594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939745936.480, "dur": 1064.315, + "args": { + "External id": 984695,"Sequence number": 10552499, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17595 + } + }, + { + "ph": "s", "id": 420, "pid": 2338708, "tid": 2338708, "ts": 6345939745936.480, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939746002.355, "dur": 104.656, + "args": { + "External id": 984696,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939746124.739, "dur": 108.585, + "args": { + "External id": 984697,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939746248.426, "dur": 36.424, + "args": { + "External id": 984698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939746295.042, "dur": 28.658, + "args": { + "External id": 984699,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939746356.439, "dur": 27.424, + "args": { + "External id": 984700,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939746402.482, "dur": 16.923, + "args": { + "External id": 984701,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939746445.020, "dur": 135.332, + "args": { + "External id": 984702,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939746497.146, "dur": 11.723, + "args": { + "External id": 984703,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939746502.975, "dur": 5.148, + "args": { + "External id": 984704,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939746511.795, "dur": 4.787, + "args": { + "External id": 984705,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939746517.867, "dur": 0.901, + "args": { + "External id": 984706,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939746523.388, "dur": 7.108, + "args": { + "External id": 984707,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939746591.736, "dur": 46.703, + "args": { + "External id": 984708,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939746670.236, "dur": 30.729, + "args": { + "External id": 984709,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939746713.381, "dur": 41.769, + "args": { + "External id": 984710,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939746761.598, "dur": 33.875, + "args": { + "External id": 984711,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939746820.168, "dur": 28.538, + "args": { + "External id": 984712,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939746854.152, "dur": 35.339, + "args": { + "External id": 984713,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939746908.142, "dur": 17.462, + "args": { + "External id": 984714,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17614 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 2338708, "tid": 2338708, + "ts": 6345939747138.164, "dur": 84.679, + "args": { + "External id": 984715,"Record function id": 0, "Ev Idx": 17615 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939747298.996, "dur": 47.972, + "args": { + "External id": 984716,"Record function id": 0, "Ev Idx": 17616 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 2338708, "tid": 2338708, + "ts": 6345939747356.689, "dur": 31027.156, + "args": { + "External id": 984717,"Record function id": 0, "Ev Idx": 17617 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2338708, "tid": 2338708, + "ts": 6345939747364.243, "dur": 980.039, + "args": { + "External id": 984718,"Record function id": 0, "Ev Idx": 17618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939747452.485, "dur": 9.139, + "args": { + "External id": 984719,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939747475.495, "dur": 45.729, + "args": { + "External id": 984720,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747480.897, "dur": 2.375, + "args": { + "External id": 984721,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747489.137, "dur": 0.765, + "args": { + "External id": 984722,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747493.925, "dur": 0.357, + "args": { + "External id": 984723,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747495.997, "dur": 0.466, + "args": { + "External id": 984724,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747499.931, "dur": 0.485, + "args": { + "External id": 984725,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747501.806, "dur": 0.470, + "args": { + "External id": 984726,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747503.227, "dur": 5.337, + "args": { + "External id": 984727,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747510.855, "dur": 0.531, + "args": { + "External id": 984728,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747512.853, "dur": 0.642, + "args": { + "External id": 984729,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939747533.795, "dur": 58.148, + "args": { + "External id": 984730,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939747624.202, "dur": 123.945, + "args": { + "External id": 984731,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939747635.138, "dur": 3.792, + "args": { + "External id": 984732,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939747647.330, "dur": 10.157, + "args": { + "External id": 984733,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939747651.706, "dur": 5.387, + "args": { + "External id": 984734,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747655.373, "dur": 0.494, + "args": { + "External id": 984735,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939747664.328, "dur": 31.670, + "args": { + "External id": 984736,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747666.252, "dur": 2.706, + "args": { + "External id": 984737,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747671.013, "dur": 0.441, + "args": { + "External id": 984738,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747672.319, "dur": 0.354, + "args": { + "External id": 984739,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747676.023, "dur": 2.173, + "args": { + "External id": 984740,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747679.178, "dur": 0.335, + "args": { + "External id": 984741,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747680.993, "dur": 0.425, + "args": { + "External id": 984742,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747684.182, "dur": 0.331, + "args": { + "External id": 984743,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747685.958, "dur": 0.342, + "args": { + "External id": 984744,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939747687.093, "dur": 2.458, + "args": { + "External id": 984745,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939747707.386, "dur": 32.332, + "args": { + "External id": 984746,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939747800.573, "dur": 433.119, + "args": { + "External id": 984747,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939747831.904, "dur": 395.960, + "args": { + "External id": 984748,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17648, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939747842.135, "dur": 379.596, + "args": { + "External id": 984749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939748261.508, "dur": 2.767, + "args": { + "External id": 984750,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17650, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2338708, "tid": 2338708, + "ts": 6345939748367.390, "dur": 29779.641, + "args": { + "External id": 984751,"Record function id": 0, "Ev Idx": 17651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939748477.415, "dur": 7.029, + "args": { + "External id": 984752,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939748488.136, "dur": 1.255, + "args": { + "External id": 984753,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939748490.885, "dur": 3.444, + "args": { + "External id": 984754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939748495.925, "dur": 1.017, + "args": { + "External id": 984755,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939748498.158, "dur": 1.324, + "args": { + "External id": 984756,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939748500.638, "dur": 0.751, + "args": { + "External id": 984757,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939748505.138, "dur": 0.760, + "args": { + "External id": 984758,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939748507.650, "dur": 2.659, + "args": { + "External id": 984759,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939748511.587, "dur": 1.202, + "args": { + "External id": 984760,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939748514.329, "dur": 0.924, + "args": { + "External id": 984761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939748536.596, "dur": 29548.429, + "args": { + "External id": 984762,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939748552.536, "dur": 29488.841, + "args": { + "External id": 984763,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939748570.934, "dur": 16.265, + "args": { + "External id": 984764,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939748590.716, "dur": 29395.248, + "args": { + "External id": 984765,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939748593.312, "dur": 29391.509, + "args": { + "External id": 984766,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939748600.140, "dur": 6.122, + "args": { + "External id": 984767,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939748607.858, "dur": 29372.289, + "args": { + "External id": 984768,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939778315.020, "dur": 41.027, + "args": { + "External id": 984769,"Sequence number": 10552500, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17669 + } + }, + { + "ph": "s", "id": 419, "pid": 2338708, "tid": 2338708, "ts": 6345939778315.020, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939778338.983, "dur": 11.885, + "args": { + "External id": 984770,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939778344.154, "dur": 6.428, + "args": { + "External id": 984771,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939778425.736, "dur": 75.851, + "args": { + "External id": 984772,"Record function id": 0, "Ev Idx": 17672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939778502.845, "dur": 1142.092, + "args": { + "External id": 984773,"Record function id": 0, "Ev Idx": 17673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939778544.067, "dur": 1086.932, + "args": { + "External id": 984774,"Sequence number": 10552501, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17674 + } + }, + { + "ph": "s", "id": 418, "pid": 2338708, "tid": 2338708, "ts": 6345939778544.067, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939778610.565, "dur": 50.950, + "args": { + "External id": 984775,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939778676.120, "dur": 105.196, + "args": { + "External id": 984776,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939778792.968, "dur": 37.337, + "args": { + "External id": 984777,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939778842.260, "dur": 29.043, + "args": { + "External id": 984778,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939778898.829, "dur": 29.587, + "args": { + "External id": 984779,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939778948.963, "dur": 17.486, + "args": { + "External id": 984780,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939778989.949, "dur": 198.201, + "args": { + "External id": 984781,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939779101.356, "dur": 13.434, + "args": { + "External id": 984782,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939779106.988, "dur": 6.634, + "args": { + "External id": 984783,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939779117.576, "dur": 3.934, + "args": { + "External id": 984784,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939779122.675, "dur": 0.878, + "args": { + "External id": 984785,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939779126.179, "dur": 6.183, + "args": { + "External id": 984786,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939779200.046, "dur": 55.869, + "args": { + "External id": 984787,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939779290.494, "dur": 32.146, + "args": { + "External id": 984788,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939779335.490, "dur": 43.359, + "args": { + "External id": 984789,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939779388.659, "dur": 35.024, + "args": { + "External id": 984790,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939779448.475, "dur": 28.846, + "args": { + "External id": 984791,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939779485.237, "dur": 36.187, + "args": { + "External id": 984792,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939779540.392, "dur": 17.693, + "args": { + "External id": 984793,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17693 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 2338708, "tid": 2338708, + "ts": 6345939779714.821, "dur": 82.873, + "args": { + "External id": 984794,"Record function id": 0, "Ev Idx": 17694 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939779873.737, "dur": 46.562, + "args": { + "External id": 984795,"Record function id": 0, "Ev Idx": 17695 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 2338708, "tid": 2338708, + "ts": 6345939779929.075, "dur": 30811.440, + "args": { + "External id": 984796,"Record function id": 0, "Ev Idx": 17696 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2338708, "tid": 2338708, + "ts": 6345939779938.693, "dur": 971.653, + "args": { + "External id": 984797,"Record function id": 0, "Ev Idx": 17697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939780047.955, "dur": 44.438, + "args": { + "External id": 984798,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939780110.443, "dur": 40.804, + "args": { + "External id": 984799,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780116.327, "dur": 2.572, + "args": { + "External id": 984800,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780124.042, "dur": 0.481, + "args": { + "External id": 984801,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780125.533, "dur": 0.699, + "args": { + "External id": 984802,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780127.806, "dur": 0.657, + "args": { + "External id": 984803,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780131.951, "dur": 0.438, + "args": { + "External id": 984804,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780133.871, "dur": 0.491, + "args": { + "External id": 984805,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780135.670, "dur": 4.375, + "args": { + "External id": 984806,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780141.750, "dur": 0.434, + "args": { + "External id": 984807,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780143.275, "dur": 0.371, + "args": { + "External id": 984808,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939780165.504, "dur": 60.372, + "args": { + "External id": 984809,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939780263.826, "dur": 124.565, + "args": { + "External id": 984810,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939780275.198, "dur": 4.956, + "args": { + "External id": 984811,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939780285.826, "dur": 10.750, + "args": { + "External id": 984812,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939780290.520, "dur": 5.655, + "args": { + "External id": 984813,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780294.073, "dur": 0.831, + "args": { + "External id": 984814,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939780303.698, "dur": 31.999, + "args": { + "External id": 984815,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780306.115, "dur": 2.039, + "args": { + "External id": 984816,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780310.073, "dur": 0.402, + "args": { + "External id": 984817,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780311.740, "dur": 0.580, + "args": { + "External id": 984818,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780315.917, "dur": 2.323, + "args": { + "External id": 984819,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780319.209, "dur": 0.648, + "args": { + "External id": 984820,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780321.220, "dur": 0.309, + "args": { + "External id": 984821,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780324.382, "dur": 0.322, + "args": { + "External id": 984822,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780325.764, "dur": 0.291, + "args": { + "External id": 984823,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939780327.415, "dur": 2.629, + "args": { + "External id": 984824,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939780348.372, "dur": 32.354, + "args": { + "External id": 984825,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939780444.247, "dur": 372.747, + "args": { + "External id": 984826,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939780475.371, "dur": 336.904, + "args": { + "External id": 984827,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17727, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939780485.702, "dur": 321.090, + "args": { + "External id": 984828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939780839.220, "dur": 2.641, + "args": { + "External id": 984829,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17729, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2338708, "tid": 2338708, + "ts": 6345939780930.550, "dur": 29587.297, + "args": { + "External id": 984830,"Record function id": 0, "Ev Idx": 17730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939781089.918, "dur": 7.497, + "args": { + "External id": 984831,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939781101.806, "dur": 0.995, + "args": { + "External id": 984832,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939781104.415, "dur": 3.554, + "args": { + "External id": 984833,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939781109.402, "dur": 0.834, + "args": { + "External id": 984834,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939781111.525, "dur": 0.911, + "args": { + "External id": 984835,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939781113.916, "dur": 1.080, + "args": { + "External id": 984836,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939781119.242, "dur": 0.647, + "args": { + "External id": 984837,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939781121.689, "dur": 2.402, + "args": { + "External id": 984838,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939781125.471, "dur": 1.131, + "args": { + "External id": 984839,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939781128.250, "dur": 0.838, + "args": { + "External id": 984840,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939781151.698, "dur": 29315.695, + "args": { + "External id": 984841,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939781167.702, "dur": 29289.903, + "args": { + "External id": 984842,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939781186.082, "dur": 16.880, + "args": { + "External id": 984843,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939781206.409, "dur": 29214.375, + "args": { + "External id": 984844,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939781209.013, "dur": 29210.849, + "args": { + "External id": 984845,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939781215.369, "dur": 6.193, + "args": { + "External id": 984846,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939781223.217, "dur": 29192.455, + "args": { + "External id": 984847,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939810680.227, "dur": 33.957, + "args": { + "External id": 984848,"Sequence number": 10552502, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17748 + } + }, + { + "ph": "s", "id": 417, "pid": 2338708, "tid": 2338708, "ts": 6345939810680.227, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939810699.072, "dur": 10.451, + "args": { + "External id": 984849,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939810703.759, "dur": 5.557, + "args": { + "External id": 984850,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939810781.333, "dur": 76.112, + "args": { + "External id": 984851,"Record function id": 0, "Ev Idx": 17751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939810859.173, "dur": 1109.505, + "args": { + "External id": 984852,"Record function id": 0, "Ev Idx": 17752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939810898.693, "dur": 1056.251, + "args": { + "External id": 984853,"Sequence number": 10552503, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17753 + } + }, + { + "ph": "s", "id": 416, "pid": 2338708, "tid": 2338708, "ts": 6345939810898.693, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939810964.982, "dur": 66.410, + "args": { + "External id": 984854,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939811047.443, "dur": 131.896, + "args": { + "External id": 984855,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939811199.350, "dur": 37.097, + "args": { + "External id": 984856,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939811246.406, "dur": 29.088, + "args": { + "External id": 984857,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939811306.791, "dur": 29.537, + "args": { + "External id": 984858,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939811354.705, "dur": 16.531, + "args": { + "External id": 984859,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939811395.831, "dur": 140.390, + "args": { + "External id": 984860,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939811452.853, "dur": 12.099, + "args": { + "External id": 984861,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939811458.476, "dur": 5.696, + "args": { + "External id": 984862,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939811467.848, "dur": 4.131, + "args": { + "External id": 984863,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939811473.488, "dur": 0.835, + "args": { + "External id": 984864,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939811478.688, "dur": 6.060, + "args": { + "External id": 984865,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939811546.484, "dur": 47.670, + "args": { + "External id": 984866,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939811627.996, "dur": 29.281, + "args": { + "External id": 984867,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939811667.708, "dur": 41.139, + "args": { + "External id": 984868,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939811717.917, "dur": 34.830, + "args": { + "External id": 984869,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939811777.358, "dur": 29.578, + "args": { + "External id": 984870,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939811813.181, "dur": 34.680, + "args": { + "External id": 984871,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939811865.936, "dur": 20.089, + "args": { + "External id": 984872,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 2338708, "tid": 2338708, + "ts": 6345939812088.638, "dur": 89.570, + "args": { + "External id": 984873,"Record function id": 0, "Ev Idx": 17773 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939812261.613, "dur": 47.488, + "args": { + "External id": 984874,"Record function id": 0, "Ev Idx": 17774 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 2338708, "tid": 2338708, + "ts": 6345939812318.464, "dur": 32261.539, + "args": { + "External id": 984875,"Record function id": 0, "Ev Idx": 17775 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2338708, "tid": 2338708, + "ts": 6345939812326.992, "dur": 977.186, + "args": { + "External id": 984876,"Record function id": 0, "Ev Idx": 17776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939812417.870, "dur": 9.391, + "args": { + "External id": 984877,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939812441.283, "dur": 44.027, + "args": { + "External id": 984878,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812447.332, "dur": 2.253, + "args": { + "External id": 984879,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812454.028, "dur": 0.450, + "args": { + "External id": 984880,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812455.637, "dur": 0.567, + "args": { + "External id": 984881,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812457.838, "dur": 0.640, + "args": { + "External id": 984882,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812461.004, "dur": 0.493, + "args": { + "External id": 984883,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812467.471, "dur": 0.728, + "args": { + "External id": 984884,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812469.417, "dur": 3.891, + "args": { + "External id": 984885,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812475.107, "dur": 0.279, + "args": { + "External id": 984886,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812478.468, "dur": 0.427, + "args": { + "External id": 984887,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939812497.650, "dur": 56.373, + "args": { + "External id": 984888,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939812587.480, "dur": 126.533, + "args": { + "External id": 984889,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939812598.735, "dur": 4.336, + "args": { + "External id": 984890,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939812612.024, "dur": 13.384, + "args": { + "External id": 984891,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939812616.645, "dur": 8.326, + "args": { + "External id": 984892,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812620.707, "dur": 2.932, + "args": { + "External id": 984893,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939812632.974, "dur": 29.919, + "args": { + "External id": 984894,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812635.245, "dur": 0.773, + "args": { + "External id": 984895,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812637.487, "dur": 0.831, + "args": { + "External id": 984896,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812639.169, "dur": 0.641, + "args": { + "External id": 984897,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812642.938, "dur": 3.315, + "args": { + "External id": 984898,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812647.912, "dur": 0.332, + "args": { + "External id": 984899,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812650.092, "dur": 0.625, + "args": { + "External id": 984900,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812652.219, "dur": 0.318, + "args": { + "External id": 984901,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812654.347, "dur": 0.334, + "args": { + "External id": 984902,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939812657.879, "dur": 0.391, + "args": { + "External id": 984903,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939812673.869, "dur": 32.510, + "args": { + "External id": 984904,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939812768.557, "dur": 425.397, + "args": { + "External id": 984905,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939812799.455, "dur": 389.127, + "args": { + "External id": 984906,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17806, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939812809.820, "dur": 371.794, + "args": { + "External id": 984907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939813221.850, "dur": 3.063, + "args": { + "External id": 984908,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17808, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2338708, "tid": 2338708, + "ts": 6345939813327.298, "dur": 31025.692, + "args": { + "External id": 984909,"Record function id": 0, "Ev Idx": 17809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939813438.418, "dur": 6.805, + "args": { + "External id": 984910,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939813448.787, "dur": 1.199, + "args": { + "External id": 984911,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939813451.628, "dur": 3.706, + "args": { + "External id": 984912,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939813456.907, "dur": 1.088, + "args": { + "External id": 984913,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939813459.413, "dur": 1.063, + "args": { + "External id": 984914,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939813464.070, "dur": 0.763, + "args": { + "External id": 984915,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939813466.381, "dur": 0.861, + "args": { + "External id": 984916,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939813468.682, "dur": 2.885, + "args": { + "External id": 984917,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939813472.910, "dur": 0.744, + "args": { + "External id": 984918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939813477.493, "dur": 0.946, + "args": { + "External id": 984919,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939813497.730, "dur": 30796.938, + "args": { + "External id": 984920,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939813513.419, "dur": 30771.364, + "args": { + "External id": 984921,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939813530.428, "dur": 16.626, + "args": { + "External id": 984922,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939813550.964, "dur": 30690.772, + "args": { + "External id": 984923,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939813553.488, "dur": 30687.173, + "args": { + "External id": 984924,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939813560.068, "dur": 5.207, + "args": { + "External id": 984925,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939813566.995, "dur": 30669.070, + "args": { + "External id": 984926,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939844515.818, "dur": 35.585, + "args": { + "External id": 984927,"Sequence number": 10552504, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17827 + } + }, + { + "ph": "s", "id": 415, "pid": 2338708, "tid": 2338708, "ts": 6345939844515.818, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939844534.696, "dur": 11.905, + "args": { + "External id": 984928,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939844540.005, "dur": 6.398, + "args": { + "External id": 984929,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939844622.725, "dur": 74.648, + "args": { + "External id": 984930,"Record function id": 0, "Ev Idx": 17830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939844699.365, "dur": 1146.008, + "args": { + "External id": 984931,"Record function id": 0, "Ev Idx": 17831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939844737.803, "dur": 1093.410, + "args": { + "External id": 984932,"Sequence number": 10552505, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17832 + } + }, + { + "ph": "s", "id": 414, "pid": 2338708, "tid": 2338708, "ts": 6345939844737.803, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939844806.611, "dur": 49.920, + "args": { + "External id": 984933,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939844870.130, "dur": 105.464, + "args": { + "External id": 984934,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939844986.754, "dur": 60.746, + "args": { + "External id": 984935,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939845098.825, "dur": 36.660, + "args": { + "External id": 984936,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939845167.658, "dur": 29.673, + "args": { + "External id": 984937,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939845218.580, "dur": 19.729, + "args": { + "External id": 984938,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939845262.925, "dur": 140.302, + "args": { + "External id": 984939,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939845318.182, "dur": 12.460, + "args": { + "External id": 984940,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939845324.060, "dur": 5.666, + "args": { + "External id": 984941,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939845333.616, "dur": 4.411, + "args": { + "External id": 984942,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939845339.345, "dur": 0.832, + "args": { + "External id": 984943,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939845343.182, "dur": 6.457, + "args": { + "External id": 984944,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939845414.657, "dur": 50.576, + "args": { + "External id": 984945,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939845497.665, "dur": 33.924, + "args": { + "External id": 984946,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939845543.811, "dur": 42.926, + "args": { + "External id": 984947,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939845595.626, "dur": 34.607, + "args": { + "External id": 984948,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939845654.158, "dur": 27.521, + "args": { + "External id": 984949,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939845689.440, "dur": 35.255, + "args": { + "External id": 984950,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939845742.957, "dur": 17.553, + "args": { + "External id": 984951,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17851 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 2338708, "tid": 2338708, + "ts": 6345939845913.305, "dur": 81.294, + "args": { + "External id": 984952,"Record function id": 0, "Ev Idx": 17852 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939846132.030, "dur": 50.223, + "args": { + "External id": 984953,"Record function id": 0, "Ev Idx": 17853 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 2338708, "tid": 2338708, + "ts": 6345939846192.423, "dur": 31130.759, + "args": { + "External id": 984954,"Record function id": 0, "Ev Idx": 17854 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2338708, "tid": 2338708, + "ts": 6345939846199.478, "dur": 1059.400, + "args": { + "External id": 984955,"Record function id": 0, "Ev Idx": 17855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939846290.530, "dur": 9.919, + "args": { + "External id": 984956,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939846315.130, "dur": 40.049, + "args": { + "External id": 984957,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846321.197, "dur": 2.461, + "args": { + "External id": 984958,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846328.515, "dur": 0.406, + "args": { + "External id": 984959,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846330.666, "dur": 0.487, + "args": { + "External id": 984960,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846332.568, "dur": 0.483, + "args": { + "External id": 984961,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846335.787, "dur": 0.742, + "args": { + "External id": 984962,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846338.330, "dur": 0.552, + "args": { + "External id": 984963,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846340.132, "dur": 4.033, + "args": { + "External id": 984964,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846345.658, "dur": 0.299, + "args": { + "External id": 984965,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846347.288, "dur": 0.325, + "args": { + "External id": 984966,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939846368.327, "dur": 56.855, + "args": { + "External id": 984967,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939846458.707, "dur": 124.703, + "args": { + "External id": 984968,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939846470.256, "dur": 3.877, + "args": { + "External id": 984969,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939846479.697, "dur": 10.956, + "args": { + "External id": 984970,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939846484.460, "dur": 5.782, + "args": { + "External id": 984971,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846488.375, "dur": 0.684, + "args": { + "External id": 984972,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939846497.115, "dur": 31.650, + "args": { + "External id": 984973,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846499.226, "dur": 2.372, + "args": { + "External id": 984974,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846503.467, "dur": 0.624, + "args": { + "External id": 984975,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846505.155, "dur": 0.408, + "args": { + "External id": 984976,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846509.405, "dur": 2.545, + "args": { + "External id": 984977,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846513.149, "dur": 0.347, + "args": { + "External id": 984978,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846515.212, "dur": 0.330, + "args": { + "External id": 984979,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846518.066, "dur": 0.457, + "args": { + "External id": 984980,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846520.059, "dur": 0.396, + "args": { + "External id": 984981,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939846521.844, "dur": 2.072, + "args": { + "External id": 984982,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939846541.822, "dur": 34.356, + "args": { + "External id": 984983,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939846635.312, "dur": 508.053, + "args": { + "External id": 984984,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939846667.336, "dur": 469.892, + "args": { + "External id": 984985,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17885, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939846677.693, "dur": 452.991, + "args": { + "External id": 984986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939847172.522, "dur": 2.960, + "args": { + "External id": 984987,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17887, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2338708, "tid": 2338708, + "ts": 6345939847279.781, "dur": 29816.555, + "args": { + "External id": 984988,"Record function id": 0, "Ev Idx": 17888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939847388.802, "dur": 6.973, + "args": { + "External id": 984989,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939847399.498, "dur": 1.438, + "args": { + "External id": 984990,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939847403.066, "dur": 3.443, + "args": { + "External id": 984991,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939847408.143, "dur": 1.268, + "args": { + "External id": 984992,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939847410.780, "dur": 2.342, + "args": { + "External id": 984993,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939847414.358, "dur": 1.148, + "args": { + "External id": 984994,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939847419.398, "dur": 0.983, + "args": { + "External id": 984995,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939847422.005, "dur": 1.853, + "args": { + "External id": 984996,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939847425.242, "dur": 0.859, + "args": { + "External id": 984997,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939847427.736, "dur": 0.758, + "args": { + "External id": 984998,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939847455.542, "dur": 29545.889, + "args": { + "External id": 984999,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939847471.055, "dur": 29519.484, + "args": { + "External id": 985000,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939847486.070, "dur": 16.931, + "args": { + "External id": 985001,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939847507.009, "dur": 29440.066, + "args": { + "External id": 985002,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939847509.845, "dur": 29435.898, + "args": { + "External id": 985003,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939847515.946, "dur": 5.728, + "args": { + "External id": 985004,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939847523.587, "dur": 29417.828, + "args": { + "External id": 985005,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939877259.348, "dur": 38.165, + "args": { + "External id": 985006,"Sequence number": 10552506, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17906 + } + }, + { + "ph": "s", "id": 413, "pid": 2338708, "tid": 2338708, "ts": 6345939877259.348, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939877280.881, "dur": 11.689, + "args": { + "External id": 985007,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939877285.836, "dur": 6.475, + "args": { + "External id": 985008,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939877365.029, "dur": 74.523, + "args": { + "External id": 985009,"Record function id": 0, "Ev Idx": 17909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939877440.984, "dur": 1139.413, + "args": { + "External id": 985010,"Record function id": 0, "Ev Idx": 17910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939877482.569, "dur": 1084.325, + "args": { + "External id": 985011,"Sequence number": 10552507, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17911 + } + }, + { + "ph": "s", "id": 412, "pid": 2338708, "tid": 2338708, "ts": 6345939877482.569, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939877548.890, "dur": 52.860, + "args": { + "External id": 985012,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939877615.377, "dur": 105.174, + "args": { + "External id": 985013,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939877733.076, "dur": 39.511, + "args": { + "External id": 985014,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939877782.146, "dur": 29.081, + "args": { + "External id": 985015,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939877840.965, "dur": 26.001, + "args": { + "External id": 985016,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939877886.972, "dur": 16.755, + "args": { + "External id": 985017,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939877926.172, "dur": 196.668, + "args": { + "External id": 985018,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939877979.343, "dur": 11.469, + "args": { + "External id": 985019,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939877984.623, "dur": 5.406, + "args": { + "External id": 985020,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939877993.745, "dur": 4.342, + "args": { + "External id": 985021,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939877999.499, "dur": 1.037, + "args": { + "External id": 985022,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939878003.222, "dur": 26.984, + "args": { + "External id": 985023,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939878137.545, "dur": 57.798, + "args": { + "External id": 985024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939878232.173, "dur": 31.650, + "args": { + "External id": 985025,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939878274.800, "dur": 41.599, + "args": { + "External id": 985026,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939878328.893, "dur": 34.604, + "args": { + "External id": 985027,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 17927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939878386.994, "dur": 29.135, + "args": { + "External id": 985028,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 17928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939878424.054, "dur": 34.713, + "args": { + "External id": 985029,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 17929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939878477.392, "dur": 17.531, + "args": { + "External id": 985030,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 17930 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 2338708, "tid": 2338708, + "ts": 6345939878647.467, "dur": 83.147, + "args": { + "External id": 985031,"Record function id": 0, "Ev Idx": 17931 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939878806.617, "dur": 47.112, + "args": { + "External id": 985032,"Record function id": 0, "Ev Idx": 17932 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 2338708, "tid": 2338708, + "ts": 6345939878862.723, "dur": 31023.464, + "args": { + "External id": 985033,"Record function id": 0, "Ev Idx": 17933 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2338708, "tid": 2338708, + "ts": 6345939878871.574, "dur": 904.947, + "args": { + "External id": 985034,"Record function id": 0, "Ev Idx": 17934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939878960.928, "dur": 8.627, + "args": { + "External id": 985035,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939878982.645, "dur": 57.750, + "args": { + "External id": 985036,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939878988.755, "dur": 2.225, + "args": { + "External id": 985037,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939878995.665, "dur": 0.438, + "args": { + "External id": 985038,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939878997.632, "dur": 0.476, + "args": { + "External id": 985039,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939878999.348, "dur": 0.545, + "args": { + "External id": 985040,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879002.755, "dur": 0.375, + "args": { + "External id": 985041,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879004.872, "dur": 0.465, + "args": { + "External id": 985042,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879006.354, "dur": 22.501, + "args": { + "External id": 985043,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879031.407, "dur": 0.526, + "args": { + "External id": 985044,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879033.324, "dur": 0.506, + "args": { + "External id": 985045,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939879088.169, "dur": 58.760, + "args": { + "External id": 985046,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939879189.309, "dur": 128.861, + "args": { + "External id": 985047,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 17947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939879202.145, "dur": 7.137, + "args": { + "External id": 985048,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939879215.508, "dur": 11.454, + "args": { + "External id": 985049,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939879220.232, "dur": 6.323, + "args": { + "External id": 985050,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 17950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879224.001, "dur": 0.920, + "args": { + "External id": 985051,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 17951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939879234.419, "dur": 29.783, + "args": { + "External id": 985052,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 17952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879236.956, "dur": 0.520, + "args": { + "External id": 985053,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879239.283, "dur": 2.073, + "args": { + "External id": 985054,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879242.254, "dur": 0.570, + "args": { + "External id": 985055,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879244.220, "dur": 2.679, + "args": { + "External id": 985056,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879249.521, "dur": 0.470, + "args": { + "External id": 985057,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879251.195, "dur": 0.497, + "args": { + "External id": 985058,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879253.059, "dur": 0.410, + "args": { + "External id": 985059,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879257.125, "dur": 0.531, + "args": { + "External id": 985060,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939879259.139, "dur": 0.273, + "args": { + "External id": 985061,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 17961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939879276.829, "dur": 33.323, + "args": { + "External id": 985062,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 17962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939879376.380, "dur": 303.734, + "args": { + "External id": 985063,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 17963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939879408.519, "dur": 266.802, + "args": { + "External id": 985064,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 17964, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939879419.048, "dur": 250.580, + "args": { + "External id": 985065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 17965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939879706.546, "dur": 2.406, + "args": { + "External id": 985066,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 17966, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2338708, "tid": 2338708, + "ts": 6345939879797.275, "dur": 29854.127, + "args": { + "External id": 985067,"Record function id": 0, "Ev Idx": 17967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939879901.095, "dur": 5.628, + "args": { + "External id": 985068,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 17968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939879910.255, "dur": 0.856, + "args": { + "External id": 985069,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939879912.738, "dur": 2.954, + "args": { + "External id": 985070,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939879917.201, "dur": 0.930, + "args": { + "External id": 985071,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939879919.603, "dur": 1.188, + "args": { + "External id": 985072,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 17972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939879922.172, "dur": 1.100, + "args": { + "External id": 985073,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 17973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939879924.606, "dur": 1.165, + "args": { + "External id": 985074,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 17974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939879927.658, "dur": 2.373, + "args": { + "External id": 985075,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939879931.442, "dur": 1.143, + "args": { + "External id": 985076,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939879936.581, "dur": 0.693, + "args": { + "External id": 985077,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 17977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939879954.985, "dur": 29633.595, + "args": { + "External id": 985078,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939879969.428, "dur": 29607.932, + "args": { + "External id": 985079,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 17979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939879984.238, "dur": 16.214, + "args": { + "External id": 985080,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939880003.999, "dur": 29529.908, + "args": { + "External id": 985081,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 17981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939880006.617, "dur": 29526.286, + "args": { + "External id": 985082,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 17982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939880033.705, "dur": 6.806, + "args": { + "External id": 985083,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939880042.408, "dur": 29485.840, + "args": { + "External id": 985084,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 17984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939909820.577, "dur": 37.992, + "args": { + "External id": 985085,"Sequence number": 10552508, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 17985 + } + }, + { + "ph": "s", "id": 411, "pid": 2338708, "tid": 2338708, "ts": 6345939909820.577, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939909841.936, "dur": 11.674, + "args": { + "External id": 985086,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 17986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939909847.047, "dur": 6.344, + "args": { + "External id": 985087,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 17987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939909928.407, "dur": 74.839, + "args": { + "External id": 985088,"Record function id": 0, "Ev Idx": 17988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939910037.767, "dur": 1185.629, + "args": { + "External id": 985089,"Record function id": 0, "Ev Idx": 17989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939910112.909, "dur": 1096.132, + "args": { + "External id": 985090,"Sequence number": 10552509, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 17990 + } + }, + { + "ph": "s", "id": 410, "pid": 2338708, "tid": 2338708, "ts": 6345939910112.909, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939910189.397, "dur": 56.545, + "args": { + "External id": 985091,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 17991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939910259.544, "dur": 109.197, + "args": { + "External id": 985092,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 17992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939910384.720, "dur": 36.424, + "args": { + "External id": 985093,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939910427.905, "dur": 29.913, + "args": { + "External id": 985094,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 17994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939910486.573, "dur": 27.095, + "args": { + "External id": 985095,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939910534.814, "dur": 18.499, + "args": { + "External id": 985096,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 17996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939910577.929, "dur": 141.002, + "args": { + "External id": 985097,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 17997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939910633.147, "dur": 11.996, + "args": { + "External id": 985098,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 17998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939910638.921, "dur": 5.444, + "args": { + "External id": 985099,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 17999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939910647.906, "dur": 4.367, + "args": { + "External id": 985100,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939910653.643, "dur": 1.116, + "args": { + "External id": 985101,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939910658.963, "dur": 6.344, + "args": { + "External id": 985102,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939910730.700, "dur": 46.009, + "args": { + "External id": 985103,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939910808.774, "dur": 33.138, + "args": { + "External id": 985104,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939910852.590, "dur": 42.695, + "args": { + "External id": 985105,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939910904.583, "dur": 35.035, + "args": { + "External id": 985106,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939910961.524, "dur": 33.018, + "args": { + "External id": 985107,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939911000.330, "dur": 89.216, + "args": { + "External id": 985108,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939911114.903, "dur": 22.440, + "args": { + "External id": 985109,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18009 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 2338708, "tid": 2338708, + "ts": 6345939911291.819, "dur": 82.773, + "args": { + "External id": 985110,"Record function id": 0, "Ev Idx": 18010 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939911452.989, "dur": 45.515, + "args": { + "External id": 985111,"Record function id": 0, "Ev Idx": 18011 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 2338708, "tid": 2338708, + "ts": 6345939911507.832, "dur": 32226.486, + "args": { + "External id": 985112,"Record function id": 0, "Ev Idx": 18012 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2338708, "tid": 2338708, + "ts": 6345939911516.141, "dur": 984.557, + "args": { + "External id": 985113,"Record function id": 0, "Ev Idx": 18013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939911603.081, "dur": 9.035, + "args": { + "External id": 985114,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939911625.115, "dur": 41.273, + "args": { + "External id": 985115,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911630.785, "dur": 2.511, + "args": { + "External id": 985116,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911638.419, "dur": 0.611, + "args": { + "External id": 985117,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911640.615, "dur": 0.602, + "args": { + "External id": 985118,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911643.124, "dur": 0.392, + "args": { + "External id": 985119,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911646.445, "dur": 0.647, + "args": { + "External id": 985120,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911648.336, "dur": 0.633, + "args": { + "External id": 985121,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911650.316, "dur": 5.281, + "args": { + "External id": 985122,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911657.283, "dur": 0.351, + "args": { + "External id": 985123,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911659.035, "dur": 0.380, + "args": { + "External id": 985124,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939911680.023, "dur": 50.909, + "args": { + "External id": 985125,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939911763.169, "dur": 134.480, + "args": { + "External id": 985126,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939911773.761, "dur": 3.960, + "args": { + "External id": 985127,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939911790.359, "dur": 11.096, + "args": { + "External id": 985128,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939911795.002, "dur": 6.046, + "args": { + "External id": 985129,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911798.918, "dur": 0.799, + "args": { + "External id": 985130,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939911808.134, "dur": 31.704, + "args": { + "External id": 985131,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911810.128, "dur": 2.327, + "args": { + "External id": 985132,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911814.083, "dur": 0.550, + "args": { + "External id": 985133,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911815.756, "dur": 0.507, + "args": { + "External id": 985134,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911819.683, "dur": 2.911, + "args": { + "External id": 985135,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911823.981, "dur": 0.521, + "args": { + "External id": 985136,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911825.838, "dur": 0.429, + "args": { + "External id": 985137,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911829.335, "dur": 0.253, + "args": { + "External id": 985138,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911831.026, "dur": 0.401, + "args": { + "External id": 985139,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939911832.269, "dur": 1.923, + "args": { + "External id": 985140,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939911857.175, "dur": 32.873, + "args": { + "External id": 985141,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939911953.505, "dur": 434.857, + "args": { + "External id": 985142,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939911985.549, "dur": 396.773, + "args": { + "External id": 985143,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18043, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939911996.866, "dur": 376.146, + "args": { + "External id": 985144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939912418.850, "dur": 2.518, + "args": { + "External id": 985145,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18045, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2338708, "tid": 2338708, + "ts": 6345939912522.293, "dur": 30981.639, + "args": { + "External id": 985146,"Record function id": 0, "Ev Idx": 18046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939912634.122, "dur": 6.921, + "args": { + "External id": 985147,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939912644.598, "dur": 0.872, + "args": { + "External id": 985148,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939912647.245, "dur": 3.933, + "args": { + "External id": 985149,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939912652.850, "dur": 0.968, + "args": { + "External id": 985150,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939912655.384, "dur": 0.829, + "args": { + "External id": 985151,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939912657.458, "dur": 0.849, + "args": { + "External id": 985152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939912659.911, "dur": 0.850, + "args": { + "External id": 985153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939912662.458, "dur": 2.280, + "args": { + "External id": 985154,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939912666.392, "dur": 0.976, + "args": { + "External id": 985155,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939912670.908, "dur": 1.079, + "args": { + "External id": 985156,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939912690.694, "dur": 30748.955, + "args": { + "External id": 985157,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939912706.296, "dur": 30720.846, + "args": { + "External id": 985158,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939912728.909, "dur": 16.339, + "args": { + "External id": 985159,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939912748.858, "dur": 30635.436, + "args": { + "External id": 985160,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939912751.378, "dur": 30631.668, + "args": { + "External id": 985161,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939912757.504, "dur": 5.738, + "args": { + "External id": 985162,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939912764.850, "dur": 30613.378, + "args": { + "External id": 985163,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939943669.745, "dur": 37.740, + "args": { + "External id": 985164,"Sequence number": 10552510, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18064 + } + }, + { + "ph": "s", "id": 409, "pid": 2338708, "tid": 2338708, "ts": 6345939943669.745, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939943688.396, "dur": 14.252, + "args": { + "External id": 985165,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939943695.734, "dur": 6.662, + "args": { + "External id": 985166,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939943776.170, "dur": 73.573, + "args": { + "External id": 985167,"Record function id": 0, "Ev Idx": 18067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939943851.368, "dur": 1188.509, + "args": { + "External id": 985168,"Record function id": 0, "Ev Idx": 18068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939943890.368, "dur": 1115.036, + "args": { + "External id": 985169,"Sequence number": 10552511, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18069 + } + }, + { + "ph": "s", "id": 408, "pid": 2338708, "tid": 2338708, "ts": 6345939943890.368, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939943955.622, "dur": 67.679, + "args": { + "External id": 985170,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939944046.650, "dur": 144.035, + "args": { + "External id": 985171,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939944212.377, "dur": 39.926, + "args": { + "External id": 985172,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939944261.842, "dur": 30.116, + "args": { + "External id": 985173,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939944329.798, "dur": 35.945, + "args": { + "External id": 985174,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939944388.500, "dur": 18.905, + "args": { + "External id": 985175,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939944434.142, "dur": 139.747, + "args": { + "External id": 985176,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939944489.048, "dur": 12.021, + "args": { + "External id": 985177,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939944494.549, "dur": 5.552, + "args": { + "External id": 985178,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939944503.819, "dur": 4.538, + "args": { + "External id": 985179,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939944509.500, "dur": 1.154, + "args": { + "External id": 985180,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939944513.247, "dur": 6.175, + "args": { + "External id": 985181,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939944585.325, "dur": 49.091, + "args": { + "External id": 985182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939944665.390, "dur": 31.971, + "args": { + "External id": 985183,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939944706.903, "dur": 41.460, + "args": { + "External id": 985184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939944759.371, "dur": 34.527, + "args": { + "External id": 985185,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939944818.319, "dur": 30.337, + "args": { + "External id": 985186,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939944856.718, "dur": 36.439, + "args": { + "External id": 985187,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939944911.806, "dur": 19.446, + "args": { + "External id": 985188,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18088 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 2338708, "tid": 2338708, + "ts": 6345939945152.640, "dur": 105.914, + "args": { + "External id": 985189,"Record function id": 0, "Ev Idx": 18089 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939945352.154, "dur": 53.027, + "args": { + "External id": 985190,"Record function id": 0, "Ev Idx": 18090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 2338708, "tid": 2338708, + "ts": 6345939945415.392, "dur": 31433.963, + "args": { + "External id": 985191,"Record function id": 0, "Ev Idx": 18091 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2338708, "tid": 2338708, + "ts": 6345939945424.928, "dur": 983.427, + "args": { + "External id": 985192,"Record function id": 0, "Ev Idx": 18092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939945519.843, "dur": 11.240, + "args": { + "External id": 985193,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939945545.484, "dur": 40.508, + "args": { + "External id": 985194,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945551.223, "dur": 2.458, + "args": { + "External id": 985195,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945558.736, "dur": 0.641, + "args": { + "External id": 985196,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945560.518, "dur": 0.621, + "args": { + "External id": 985197,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945562.774, "dur": 0.472, + "args": { + "External id": 985198,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945566.579, "dur": 0.357, + "args": { + "External id": 985199,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945568.725, "dur": 0.498, + "args": { + "External id": 985200,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945570.312, "dur": 4.525, + "args": { + "External id": 985201,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945576.948, "dur": 0.355, + "args": { + "External id": 985202,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945578.132, "dur": 0.430, + "args": { + "External id": 985203,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939945600.384, "dur": 61.627, + "args": { + "External id": 985204,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939945696.519, "dur": 130.044, + "args": { + "External id": 985205,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939945708.683, "dur": 4.613, + "args": { + "External id": 985206,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939945718.849, "dur": 13.314, + "args": { + "External id": 985207,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939945723.439, "dur": 8.300, + "args": { + "External id": 985208,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945729.826, "dur": 0.705, + "args": { + "External id": 985209,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939945739.053, "dur": 31.716, + "args": { + "External id": 985210,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945741.221, "dur": 2.723, + "args": { + "External id": 985211,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945745.239, "dur": 0.547, + "args": { + "External id": 985212,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945746.998, "dur": 0.609, + "args": { + "External id": 985213,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945750.799, "dur": 2.997, + "args": { + "External id": 985214,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945754.913, "dur": 0.371, + "args": { + "External id": 985215,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945756.975, "dur": 0.513, + "args": { + "External id": 985216,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945760.554, "dur": 0.570, + "args": { + "External id": 985217,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945762.826, "dur": 0.453, + "args": { + "External id": 985218,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939945764.008, "dur": 2.207, + "args": { + "External id": 985219,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939945782.707, "dur": 34.947, + "args": { + "External id": 985220,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939945883.164, "dur": 414.561, + "args": { + "External id": 985221,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939945914.586, "dur": 377.160, + "args": { + "External id": 985222,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18122, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939945925.147, "dur": 360.161, + "args": { + "External id": 985223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939946326.641, "dur": 2.651, + "args": { + "External id": 985224,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18124, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2338708, "tid": 2338708, + "ts": 6345939946429.975, "dur": 30181.719, + "args": { + "External id": 985225,"Record function id": 0, "Ev Idx": 18125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939946539.083, "dur": 6.819, + "args": { + "External id": 985226,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939946550.053, "dur": 1.659, + "args": { + "External id": 985227,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939946553.542, "dur": 3.629, + "args": { + "External id": 985228,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939946558.646, "dur": 1.091, + "args": { + "External id": 985229,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939946561.327, "dur": 0.981, + "args": { + "External id": 985230,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939946563.574, "dur": 0.819, + "args": { + "External id": 985231,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939946565.832, "dur": 0.940, + "args": { + "External id": 985232,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939946568.268, "dur": 2.418, + "args": { + "External id": 985233,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939946571.869, "dur": 0.874, + "args": { + "External id": 985234,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939946576.414, "dur": 0.686, + "args": { + "External id": 985235,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939946595.894, "dur": 29948.050, + "args": { + "External id": 985236,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939946610.817, "dur": 29920.464, + "args": { + "External id": 985237,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939946628.811, "dur": 17.960, + "args": { + "External id": 985238,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939946650.516, "dur": 29833.919, + "args": { + "External id": 985239,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939946653.068, "dur": 29830.220, + "args": { + "External id": 985240,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939946659.563, "dur": 5.579, + "args": { + "External id": 985241,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939946666.776, "dur": 29811.458, + "args": { + "External id": 985242,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939976782.789, "dur": 37.405, + "args": { + "External id": 985243,"Sequence number": 10552512, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18143 + } + }, + { + "ph": "s", "id": 407, "pid": 2338708, "tid": 2338708, "ts": 6345939976782.789, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345939976803.058, "dur": 12.420, + "args": { + "External id": 985244,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939976808.664, "dur": 6.540, + "args": { + "External id": 985245,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345939976891.385, "dur": 74.679, + "args": { + "External id": 985246,"Record function id": 0, "Ev Idx": 18146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345939976967.350, "dur": 1181.582, + "args": { + "External id": 985247,"Record function id": 0, "Ev Idx": 18147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345939977006.129, "dur": 1126.299, + "args": { + "External id": 985248,"Sequence number": 10552513, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18148 + } + }, + { + "ph": "s", "id": 406, "pid": 2338708, "tid": 2338708, "ts": 6345939977006.129, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939977129.425, "dur": 55.509, + "args": { + "External id": 985249,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939977199.309, "dur": 105.838, + "args": { + "External id": 985250,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939977317.138, "dur": 40.205, + "args": { + "External id": 985251,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939977366.722, "dur": 28.835, + "args": { + "External id": 985252,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939977425.355, "dur": 27.361, + "args": { + "External id": 985253,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345939977474.322, "dur": 20.641, + "args": { + "External id": 985254,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345939977517.834, "dur": 135.103, + "args": { + "External id": 985255,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345939977569.624, "dur": 12.272, + "args": { + "External id": 985256,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939977575.392, "dur": 5.448, + "args": { + "External id": 985257,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939977584.537, "dur": 4.020, + "args": { + "External id": 985258,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939977589.634, "dur": 1.419, + "args": { + "External id": 985259,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939977593.700, "dur": 6.370, + "args": { + "External id": 985260,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939977663.574, "dur": 46.185, + "args": { + "External id": 985261,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345939977741.997, "dur": 31.832, + "args": { + "External id": 985262,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939977783.653, "dur": 42.771, + "args": { + "External id": 985263,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939977837.957, "dur": 34.846, + "args": { + "External id": 985264,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345939977894.910, "dur": 28.986, + "args": { + "External id": 985265,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345939977931.644, "dur": 36.344, + "args": { + "External id": 985266,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345939977988.198, "dur": 18.847, + "args": { + "External id": 985267,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18167 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 2338708, "tid": 2338708, + "ts": 6345939978220.181, "dur": 85.159, + "args": { + "External id": 985268,"Record function id": 0, "Ev Idx": 18168 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345939978383.026, "dur": 47.185, + "args": { + "External id": 985269,"Record function id": 0, "Ev Idx": 18169 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 2338708, "tid": 2338708, + "ts": 6345939978439.789, "dur": 31411.362, + "args": { + "External id": 985270,"Record function id": 0, "Ev Idx": 18170 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2338708, "tid": 2338708, + "ts": 6345939978447.072, "dur": 963.288, + "args": { + "External id": 985271,"Record function id": 0, "Ev Idx": 18171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939978535.943, "dur": 9.453, + "args": { + "External id": 985272,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939978558.732, "dur": 35.905, + "args": { + "External id": 985273,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978564.390, "dur": 2.445, + "args": { + "External id": 985274,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978571.339, "dur": 0.497, + "args": { + "External id": 985275,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978573.103, "dur": 0.558, + "args": { + "External id": 985276,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978575.395, "dur": 0.331, + "args": { + "External id": 985277,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978578.175, "dur": 0.408, + "args": { + "External id": 985278,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978580.140, "dur": 0.607, + "args": { + "External id": 985279,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978581.639, "dur": 2.827, + "args": { + "External id": 985280,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978585.999, "dur": 0.364, + "args": { + "External id": 985281,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978587.532, "dur": 0.515, + "args": { + "External id": 985282,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939978607.246, "dur": 60.552, + "args": { + "External id": 985283,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345939978701.354, "dur": 126.306, + "args": { + "External id": 985284,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939978711.860, "dur": 3.899, + "args": { + "External id": 985285,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345939978724.240, "dur": 10.449, + "args": { + "External id": 985286,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345939978728.833, "dur": 5.440, + "args": { + "External id": 985287,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978732.417, "dur": 0.539, + "args": { + "External id": 985288,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345939978741.609, "dur": 32.034, + "args": { + "External id": 985289,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978743.335, "dur": 2.329, + "args": { + "External id": 985290,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978747.917, "dur": 0.546, + "args": { + "External id": 985291,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978749.577, "dur": 0.517, + "args": { + "External id": 985292,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978753.810, "dur": 2.451, + "args": { + "External id": 985293,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978757.188, "dur": 0.239, + "args": { + "External id": 985294,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978758.990, "dur": 0.447, + "args": { + "External id": 985295,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978762.612, "dur": 0.395, + "args": { + "External id": 985296,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978764.723, "dur": 0.345, + "args": { + "External id": 985297,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939978766.504, "dur": 2.388, + "args": { + "External id": 985298,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939978786.997, "dur": 32.763, + "args": { + "External id": 985299,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345939978881.746, "dur": 419.308, + "args": { + "External id": 985300,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939978913.876, "dur": 381.360, + "args": { + "External id": 985301,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18201, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345939978923.920, "dur": 365.288, + "args": { + "External id": 985302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345939979330.681, "dur": 2.584, + "args": { + "External id": 985303,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18203, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2338708, "tid": 2338708, + "ts": 6345939979431.115, "dur": 30191.653, + "args": { + "External id": 985304,"Record function id": 0, "Ev Idx": 18204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939979539.276, "dur": 6.747, + "args": { + "External id": 985305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939979549.495, "dur": 1.310, + "args": { + "External id": 985306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939979552.378, "dur": 3.051, + "args": { + "External id": 985307,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939979556.986, "dur": 0.702, + "args": { + "External id": 985308,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939979558.878, "dur": 0.738, + "args": { + "External id": 985309,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939979560.769, "dur": 0.836, + "args": { + "External id": 985310,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939979563.055, "dur": 0.957, + "args": { + "External id": 985311,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939979565.750, "dur": 2.579, + "args": { + "External id": 985312,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939979569.515, "dur": 0.743, + "args": { + "External id": 985313,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345939979573.813, "dur": 0.763, + "args": { + "External id": 985314,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939979593.561, "dur": 29966.644, + "args": { + "External id": 985315,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939979609.594, "dur": 29938.599, + "args": { + "External id": 985316,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345939979632.185, "dur": 16.620, + "args": { + "External id": 985317,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345939979652.581, "dur": 29849.252, + "args": { + "External id": 985318,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345939979655.168, "dur": 29845.641, + "args": { + "External id": 985319,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345939979660.463, "dur": 6.226, + "args": { + "External id": 985320,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345939979668.432, "dur": 29827.581, + "args": { + "External id": 985321,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940009787.251, "dur": 37.351, + "args": { + "External id": 985322,"Sequence number": 10552514, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18222 + } + }, + { + "ph": "s", "id": 405, "pid": 2338708, "tid": 2338708, "ts": 6345940009787.251, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345940009807.202, "dur": 12.333, + "args": { + "External id": 985323,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940009812.644, "dur": 6.675, + "args": { + "External id": 985324,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345940009892.143, "dur": 74.993, + "args": { + "External id": 985325,"Record function id": 0, "Ev Idx": 18225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345940009968.656, "dur": 1175.018, + "args": { + "External id": 985326,"Record function id": 0, "Ev Idx": 18226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940010029.167, "dur": 1099.851, + "args": { + "External id": 985327,"Sequence number": 10552515, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18227 + } + }, + { + "ph": "s", "id": 404, "pid": 2338708, "tid": 2338708, "ts": 6345940010029.167, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345940010137.621, "dur": 56.596, + "args": { + "External id": 985328,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940010207.519, "dur": 105.950, + "args": { + "External id": 985329,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940010326.184, "dur": 37.926, + "args": { + "External id": 985330,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940010373.004, "dur": 29.592, + "args": { + "External id": 985331,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345940010429.795, "dur": 25.838, + "args": { + "External id": 985332,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345940010477.664, "dur": 17.411, + "args": { + "External id": 985333,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345940010519.063, "dur": 136.305, + "args": { + "External id": 985334,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940010573.466, "dur": 12.207, + "args": { + "External id": 985335,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940010579.838, "dur": 4.973, + "args": { + "External id": 985336,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940010588.429, "dur": 4.274, + "args": { + "External id": 985337,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940010593.812, "dur": 1.203, + "args": { + "External id": 985338,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940010597.514, "dur": 5.717, + "args": { + "External id": 985339,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940010666.178, "dur": 45.951, + "args": { + "External id": 985340,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345940010746.143, "dur": 30.875, + "args": { + "External id": 985341,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940010787.098, "dur": 40.642, + "args": { + "External id": 985342,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940010839.351, "dur": 34.103, + "args": { + "External id": 985343,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345940010895.542, "dur": 25.213, + "args": { + "External id": 985344,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940010927.959, "dur": 35.638, + "args": { + "External id": 985345,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345940010982.462, "dur": 17.508, + "args": { + "External id": 985346,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18246 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 2338708, "tid": 2338708, + "ts": 6345940011217.103, "dur": 83.156, + "args": { + "External id": 985347,"Record function id": 0, "Ev Idx": 18247 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345940011378.776, "dur": 46.956, + "args": { + "External id": 985348,"Record function id": 0, "Ev Idx": 18248 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 2338708, "tid": 2338708, + "ts": 6345940011434.867, "dur": 32515.689, + "args": { + "External id": 985349,"Record function id": 0, "Ev Idx": 18249 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2338708, "tid": 2338708, + "ts": 6345940011442.245, "dur": 1017.950, + "args": { + "External id": 985350,"Record function id": 0, "Ev Idx": 18250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940011531.716, "dur": 9.133, + "args": { + "External id": 985351,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345940011554.362, "dur": 37.450, + "args": { + "External id": 985352,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011559.889, "dur": 2.404, + "args": { + "External id": 985353,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011567.053, "dur": 0.492, + "args": { + "External id": 985354,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011569.000, "dur": 0.438, + "args": { + "External id": 985355,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011570.904, "dur": 0.774, + "args": { + "External id": 985356,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011574.562, "dur": 0.678, + "args": { + "External id": 985357,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011576.472, "dur": 0.622, + "args": { + "External id": 985358,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011578.138, "dur": 3.707, + "args": { + "External id": 985359,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011583.274, "dur": 0.380, + "args": { + "External id": 985360,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011584.605, "dur": 0.479, + "args": { + "External id": 985361,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940011605.103, "dur": 56.036, + "args": { + "External id": 985362,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345940011693.161, "dur": 121.454, + "args": { + "External id": 985363,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940011704.348, "dur": 4.017, + "args": { + "External id": 985364,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345940011714.055, "dur": 10.293, + "args": { + "External id": 985365,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940011718.636, "dur": 5.308, + "args": { + "External id": 985366,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011722.480, "dur": 0.434, + "args": { + "External id": 985367,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345940011731.211, "dur": 31.331, + "args": { + "External id": 985368,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011733.154, "dur": 1.805, + "args": { + "External id": 985369,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011736.594, "dur": 0.345, + "args": { + "External id": 985370,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011738.296, "dur": 0.544, + "args": { + "External id": 985371,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011741.948, "dur": 2.795, + "args": { + "External id": 985372,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011747.538, "dur": 0.591, + "args": { + "External id": 985373,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011749.560, "dur": 0.533, + "args": { + "External id": 985374,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011752.204, "dur": 0.304, + "args": { + "External id": 985375,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011754.274, "dur": 0.596, + "args": { + "External id": 985376,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940011755.976, "dur": 2.127, + "args": { + "External id": 985377,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940011773.181, "dur": 33.687, + "args": { + "External id": 985378,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345940011867.780, "dur": 479.182, + "args": { + "External id": 985379,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345940011901.001, "dur": 439.562, + "args": { + "External id": 985380,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18280, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345940011911.451, "dur": 422.157, + "args": { + "External id": 985381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345940012378.455, "dur": 2.397, + "args": { + "External id": 985382,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18282, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2338708, "tid": 2338708, + "ts": 6345940012481.256, "dur": 31210.360, + "args": { + "External id": 985383,"Record function id": 0, "Ev Idx": 18283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940012593.161, "dur": 7.297, + "args": { + "External id": 985384,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940012603.925, "dur": 0.952, + "args": { + "External id": 985385,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940012606.371, "dur": 3.913, + "args": { + "External id": 985386,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940012612.038, "dur": 0.601, + "args": { + "External id": 985387,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940012613.897, "dur": 0.947, + "args": { + "External id": 985388,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940012616.069, "dur": 1.126, + "args": { + "External id": 985389,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940012618.589, "dur": 1.043, + "args": { + "External id": 985390,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940012621.326, "dur": 2.231, + "args": { + "External id": 985391,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940012624.685, "dur": 0.961, + "args": { + "External id": 985392,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940012629.054, "dur": 0.876, + "args": { + "External id": 985393,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940012648.437, "dur": 30978.215, + "args": { + "External id": 985394,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940012663.827, "dur": 30951.232, + "args": { + "External id": 985395,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940012682.290, "dur": 16.280, + "args": { + "External id": 985396,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345940012701.771, "dur": 30869.015, + "args": { + "External id": 985397,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940012704.264, "dur": 30865.440, + "args": { + "External id": 985398,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940012710.272, "dur": 5.730, + "args": { + "External id": 985399,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940012717.881, "dur": 30846.988, + "args": { + "External id": 985400,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940043880.180, "dur": 39.773, + "args": { + "External id": 985401,"Sequence number": 10552516, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18301 + } + }, + { + "ph": "s", "id": 403, "pid": 2338708, "tid": 2338708, "ts": 6345940043880.180, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345940043902.861, "dur": 12.121, + "args": { + "External id": 985402,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940043908.098, "dur": 6.636, + "args": { + "External id": 985403,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345940043993.105, "dur": 118.619, + "args": { + "External id": 985404,"Record function id": 0, "Ev Idx": 18304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345940044114.802, "dur": 1237.327, + "args": { + "External id": 985405,"Record function id": 0, "Ev Idx": 18305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940044157.843, "dur": 1178.812, + "args": { + "External id": 985406,"Sequence number": 10552517, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18306 + } + }, + { + "ph": "s", "id": 402, "pid": 2338708, "tid": 2338708, "ts": 6345940044157.843, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345940044232.434, "dur": 56.398, + "args": { + "External id": 985407,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940044302.545, "dur": 109.586, + "args": { + "External id": 985408,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940044424.597, "dur": 37.152, + "args": { + "External id": 985409,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940044470.836, "dur": 28.954, + "args": { + "External id": 985410,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345940044528.635, "dur": 26.976, + "args": { + "External id": 985411,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345940044578.579, "dur": 17.660, + "args": { + "External id": 985412,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345940044620.827, "dur": 136.929, + "args": { + "External id": 985413,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940044675.222, "dur": 12.388, + "args": { + "External id": 985414,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940044681.192, "dur": 5.671, + "args": { + "External id": 985415,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940044691.016, "dur": 3.943, + "args": { + "External id": 985416,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940044696.007, "dur": 0.931, + "args": { + "External id": 985417,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940044699.842, "dur": 5.760, + "args": { + "External id": 985418,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940044768.949, "dur": 46.532, + "args": { + "External id": 985419,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345940044858.610, "dur": 41.254, + "args": { + "External id": 985420,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940044914.703, "dur": 62.248, + "args": { + "External id": 985421,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940044994.036, "dur": 103.907, + "args": { + "External id": 985422,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345940045132.250, "dur": 35.107, + "args": { + "External id": 985423,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940045176.398, "dur": 42.705, + "args": { + "External id": 985424,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345940045241.501, "dur": 20.417, + "args": { + "External id": 985425,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18325 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 2338708, "tid": 2338708, + "ts": 6345940045424.902, "dur": 83.223, + "args": { + "External id": 985426,"Record function id": 0, "Ev Idx": 18326 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345940045590.205, "dur": 45.721, + "args": { + "External id": 985427,"Record function id": 0, "Ev Idx": 18327 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 2338708, "tid": 2338708, + "ts": 6345940045645.665, "dur": 31302.442, + "args": { + "External id": 985428,"Record function id": 0, "Ev Idx": 18328 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2338708, "tid": 2338708, + "ts": 6345940045653.192, "dur": 973.569, + "args": { + "External id": 985429,"Record function id": 0, "Ev Idx": 18329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940045743.150, "dur": 9.586, + "args": { + "External id": 985430,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345940045766.532, "dur": 39.682, + "args": { + "External id": 985431,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045772.423, "dur": 2.492, + "args": { + "External id": 985432,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045779.420, "dur": 0.302, + "args": { + "External id": 985433,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045780.920, "dur": 0.345, + "args": { + "External id": 985434,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045782.748, "dur": 0.793, + "args": { + "External id": 985435,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045786.301, "dur": 0.531, + "args": { + "External id": 985436,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045788.412, "dur": 0.597, + "args": { + "External id": 985437,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045793.199, "dur": 3.833, + "args": { + "External id": 985438,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045798.197, "dur": 0.397, + "args": { + "External id": 985439,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045799.567, "dur": 0.445, + "args": { + "External id": 985440,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940045819.479, "dur": 55.050, + "args": { + "External id": 985441,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345940045907.962, "dur": 188.330, + "args": { + "External id": 985442,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940045919.329, "dur": 4.145, + "args": { + "External id": 985443,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345940045929.344, "dur": 10.663, + "args": { + "External id": 985444,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940045933.881, "dur": 5.710, + "args": { + "External id": 985445,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045937.557, "dur": 0.902, + "args": { + "External id": 985446,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345940045946.473, "dur": 31.441, + "args": { + "External id": 985447,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045948.582, "dur": 1.888, + "args": { + "External id": 985448,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045952.257, "dur": 0.536, + "args": { + "External id": 985449,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045953.837, "dur": 0.549, + "args": { + "External id": 985450,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045957.578, "dur": 2.423, + "args": { + "External id": 985451,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045961.126, "dur": 0.565, + "args": { + "External id": 985452,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045963.163, "dur": 0.547, + "args": { + "External id": 985453,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045966.519, "dur": 0.636, + "args": { + "External id": 985454,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045969.021, "dur": 0.320, + "args": { + "External id": 985455,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940045971.161, "dur": 2.721, + "args": { + "External id": 985456,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940045991.273, "dur": 56.053, + "args": { + "External id": 985457,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345940046159.207, "dur": 372.507, + "args": { + "External id": 985458,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345940046192.382, "dur": 334.442, + "args": { + "External id": 985459,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18359, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345940046202.917, "dur": 317.397, + "args": { + "External id": 985460,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345940046556.434, "dur": 2.527, + "args": { + "External id": 985461,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18361, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2338708, "tid": 2338708, + "ts": 6345940046647.448, "dur": 30070.210, + "args": { + "External id": 985462,"Record function id": 0, "Ev Idx": 18362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940046752.323, "dur": 6.615, + "args": { + "External id": 985463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940046762.737, "dur": 1.481, + "args": { + "External id": 985464,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940046765.807, "dur": 3.512, + "args": { + "External id": 985465,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940046771.136, "dur": 0.799, + "args": { + "External id": 985466,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940046773.426, "dur": 0.997, + "args": { + "External id": 985467,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940046775.889, "dur": 0.930, + "args": { + "External id": 985468,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940046778.222, "dur": 0.817, + "args": { + "External id": 985469,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940046780.992, "dur": 2.198, + "args": { + "External id": 985470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940046784.512, "dur": 0.716, + "args": { + "External id": 985471,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940046788.755, "dur": 0.787, + "args": { + "External id": 985472,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940046808.199, "dur": 29845.642, + "args": { + "External id": 985473,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940046823.594, "dur": 29818.557, + "args": { + "External id": 985474,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940046840.962, "dur": 17.006, + "args": { + "External id": 985475,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345940046861.443, "dur": 29735.270, + "args": { + "External id": 985476,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940046864.490, "dur": 29731.154, + "args": { + "External id": 985477,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940046869.709, "dur": 6.457, + "args": { + "External id": 985478,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940046877.966, "dur": 29712.866, + "args": { + "External id": 985479,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940076882.043, "dur": 38.174, + "args": { + "External id": 985480,"Sequence number": 10552518, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18380 + } + }, + { + "ph": "s", "id": 401, "pid": 2338708, "tid": 2338708, "ts": 6345940076882.043, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345940076902.827, "dur": 12.400, + "args": { + "External id": 985481,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940076908.220, "dur": 6.818, + "args": { + "External id": 985482,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345940076988.442, "dur": 123.620, + "args": { + "External id": 985483,"Record function id": 0, "Ev Idx": 18383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345940077115.493, "dur": 1148.976, + "args": { + "External id": 985484,"Record function id": 0, "Ev Idx": 18384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940077156.536, "dur": 1093.406, + "args": { + "External id": 985485,"Sequence number": 10552519, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18385 + } + }, + { + "ph": "s", "id": 400, "pid": 2338708, "tid": 2338708, "ts": 6345940077156.536, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345940077231.756, "dur": 57.388, + "args": { + "External id": 985486,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940077302.348, "dur": 105.150, + "args": { + "External id": 985487,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940077420.599, "dur": 38.185, + "args": { + "External id": 985488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940077468.654, "dur": 30.369, + "args": { + "External id": 985489,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345940077529.584, "dur": 29.552, + "args": { + "External id": 985490,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345940077578.694, "dur": 16.500, + "args": { + "External id": 985491,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345940077619.019, "dur": 134.231, + "args": { + "External id": 985492,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940077671.454, "dur": 11.989, + "args": { + "External id": 985493,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940077677.217, "dur": 5.433, + "args": { + "External id": 985494,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940077686.385, "dur": 4.586, + "args": { + "External id": 985495,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940077692.194, "dur": 0.933, + "args": { + "External id": 985496,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940077695.700, "dur": 4.955, + "args": { + "External id": 985497,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940077763.837, "dur": 47.196, + "args": { + "External id": 985498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345940077842.403, "dur": 32.329, + "args": { + "External id": 985499,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940077884.629, "dur": 40.541, + "args": { + "External id": 985500,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940077936.117, "dur": 35.603, + "args": { + "External id": 985501,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345940077993.414, "dur": 47.937, + "args": { + "External id": 985502,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940078050.814, "dur": 77.337, + "args": { + "External id": 985503,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345940078155.076, "dur": 22.217, + "args": { + "External id": 985504,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18404 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 2338708, "tid": 2338708, + "ts": 6345940078335.178, "dur": 80.024, + "args": { + "External id": 985505,"Record function id": 0, "Ev Idx": 18405 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345940078492.425, "dur": 46.827, + "args": { + "External id": 985506,"Record function id": 0, "Ev Idx": 18406 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 2338708, "tid": 2338708, + "ts": 6345940078547.800, "dur": 31433.748, + "args": { + "External id": 985507,"Record function id": 0, "Ev Idx": 18407 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2338708, "tid": 2338708, + "ts": 6345940078555.972, "dur": 955.325, + "args": { + "External id": 985508,"Record function id": 0, "Ev Idx": 18408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940078642.842, "dur": 8.681, + "args": { + "External id": 985509,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345940078664.428, "dur": 36.828, + "args": { + "External id": 985510,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078670.179, "dur": 2.253, + "args": { + "External id": 985511,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078676.347, "dur": 0.431, + "args": { + "External id": 985512,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078677.807, "dur": 0.545, + "args": { + "External id": 985513,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078679.735, "dur": 0.675, + "args": { + "External id": 985514,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078683.003, "dur": 0.596, + "args": { + "External id": 985515,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078685.051, "dur": 0.934, + "args": { + "External id": 985516,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078687.346, "dur": 4.233, + "args": { + "External id": 985517,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078693.259, "dur": 0.404, + "args": { + "External id": 985518,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078694.575, "dur": 0.283, + "args": { + "External id": 985519,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940078714.810, "dur": 52.134, + "args": { + "External id": 985520,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345940078798.714, "dur": 118.911, + "args": { + "External id": 985521,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940078810.129, "dur": 4.299, + "args": { + "External id": 985522,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345940078820.060, "dur": 9.974, + "args": { + "External id": 985523,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940078824.483, "dur": 5.147, + "args": { + "External id": 985524,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078827.853, "dur": 0.523, + "args": { + "External id": 985525,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345940078836.520, "dur": 30.327, + "args": { + "External id": 985526,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078838.787, "dur": 1.764, + "args": { + "External id": 985527,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078842.532, "dur": 0.658, + "args": { + "External id": 985528,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078844.047, "dur": 0.645, + "args": { + "External id": 985529,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078848.253, "dur": 2.481, + "args": { + "External id": 985530,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078851.717, "dur": 0.573, + "args": { + "External id": 985531,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078854.558, "dur": 0.349, + "args": { + "External id": 985532,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078857.370, "dur": 0.434, + "args": { + "External id": 985533,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078859.427, "dur": 0.533, + "args": { + "External id": 985534,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940078860.967, "dur": 1.547, + "args": { + "External id": 985535,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940078880.469, "dur": 29.486, + "args": { + "External id": 985536,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345940078972.662, "dur": 429.253, + "args": { + "External id": 985537,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345940079004.583, "dur": 391.469, + "args": { + "External id": 985538,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18438, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345940079036.457, "dur": 354.035, + "args": { + "External id": 985539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345940079431.227, "dur": 2.708, + "args": { + "External id": 985540,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18440, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2338708, "tid": 2338708, + "ts": 6345940079532.170, "dur": 30105.654, + "args": { + "External id": 985541,"Record function id": 0, "Ev Idx": 18441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940079642.042, "dur": 6.832, + "args": { + "External id": 985542,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940079652.489, "dur": 0.962, + "args": { + "External id": 985543,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940079655.100, "dur": 3.378, + "args": { + "External id": 985544,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940079660.005, "dur": 1.032, + "args": { + "External id": 985545,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940079662.394, "dur": 1.278, + "args": { + "External id": 985546,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940079664.876, "dur": 0.940, + "args": { + "External id": 985547,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940079667.359, "dur": 0.826, + "args": { + "External id": 985548,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940079669.859, "dur": 2.417, + "args": { + "External id": 985549,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940079673.483, "dur": 0.827, + "args": { + "External id": 985550,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940079677.903, "dur": 0.637, + "args": { + "External id": 985551,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940079697.519, "dur": 29832.573, + "args": { + "External id": 985552,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940079713.366, "dur": 29796.933, + "args": { + "External id": 985553,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940079728.229, "dur": 16.784, + "args": { + "External id": 985554,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345940079748.323, "dur": 29685.483, + "args": { + "External id": 985555,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940079751.095, "dur": 29680.247, + "args": { + "External id": 985556,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940079757.156, "dur": 5.356, + "args": { + "External id": 985557,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940079764.194, "dur": 29657.228, + "args": { + "External id": 985558,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940109892.879, "dur": 50.464, + "args": { + "External id": 985559,"Sequence number": 10552520, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18459 + } + }, + { + "ph": "s", "id": 399, "pid": 2338708, "tid": 2338708, "ts": 6345940109892.879, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345940109920.600, "dur": 16.521, + "args": { + "External id": 985560,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940109928.160, "dur": 8.676, + "args": { + "External id": 985561,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345940110103.513, "dur": 152.961, + "args": { + "External id": 985562,"Record function id": 0, "Ev Idx": 18462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345940110258.941, "dur": 1522.500, + "args": { + "External id": 985563,"Record function id": 0, "Ev Idx": 18463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940110318.797, "dur": 1441.908, + "args": { + "External id": 985564,"Sequence number": 10552521, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18464 + } + }, + { + "ph": "s", "id": 398, "pid": 2338708, "tid": 2338708, "ts": 6345940110318.797, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345940110427.565, "dur": 71.700, + "args": { + "External id": 985565,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940110517.779, "dur": 131.172, + "args": { + "External id": 985566,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940110665.395, "dur": 43.413, + "args": { + "External id": 985567,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940110721.223, "dur": 34.592, + "args": { + "External id": 985568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345940110790.968, "dur": 38.176, + "args": { + "External id": 985569,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345940110861.328, "dur": 24.395, + "args": { + "External id": 985570,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345940110919.202, "dur": 256.790, + "args": { + "External id": 985571,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940110996.723, "dur": 37.637, + "args": { + "External id": 985572,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940111003.995, "dur": 28.637, + "args": { + "External id": 985573,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940111039.185, "dur": 5.833, + "args": { + "External id": 985574,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940111046.770, "dur": 1.304, + "args": { + "External id": 985575,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940111051.857, "dur": 47.651, + "args": { + "External id": 985576,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940111193.282, "dur": 67.189, + "args": { + "External id": 985577,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345940111305.567, "dur": 42.827, + "args": { + "External id": 985578,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940111361.301, "dur": 51.947, + "args": { + "External id": 985579,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940111434.038, "dur": 42.226, + "args": { + "External id": 985580,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345940111510.389, "dur": 38.358, + "args": { + "External id": 985581,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940111558.701, "dur": 46.935, + "args": { + "External id": 985582,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345940111629.741, "dur": 23.403, + "args": { + "External id": 985583,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18483 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 2338708, "tid": 2338708, + "ts": 6345940111882.619, "dur": 113.433, + "args": { + "External id": 985584,"Record function id": 0, "Ev Idx": 18484 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2338708, "tid": 2338708, + "ts": 6345940112178.557, "dur": 65.430, + "args": { + "External id": 985585,"Record function id": 0, "Ev Idx": 18485 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 2338708, "tid": 2338708, + "ts": 6345940112256.169, "dur": 37332.167, + "args": { + "External id": 985586,"Record function id": 0, "Ev Idx": 18486 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2338708, "tid": 2338708, + "ts": 6345940112268.962, "dur": 1255.345, + "args": { + "External id": 985587,"Record function id": 0, "Ev Idx": 18487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940112384.912, "dur": 12.374, + "args": { + "External id": 985588,"Record function id": 0, "Concrete Inputs": ["[27264000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345940112417.637, "dur": 53.821, + "args": { + "External id": 985589,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112425.295, "dur": 2.747, + "args": { + "External id": 985590,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112434.036, "dur": 0.515, + "args": { + "External id": 985591,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112436.142, "dur": 0.619, + "args": { + "External id": 985592,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112438.630, "dur": 0.612, + "args": { + "External id": 985593,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "2621952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112443.665, "dur": 0.498, + "args": { + "External id": 985594,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "3146240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112445.945, "dur": 0.798, + "args": { + "External id": 985595,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "5243392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112448.282, "dur": 5.816, + "args": { + "External id": 985596,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "5243904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112456.389, "dur": 0.460, + "args": { + "External id": 985597,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "12583936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112458.535, "dur": 0.610, + "args": { + "External id": 985598,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "19923968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940112488.334, "dur": 72.092, + "args": { + "External id": 985599,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2338708, "tid": 2338708, + "ts": 6345940112607.537, "dur": 187.469, + "args": { + "External id": 985600,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "27264000", "8", "2", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [], [], [], [], [], []], "Ev Idx": 18500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940112640.919, "dur": 7.302, + "args": { + "External id": 985601,"Record function id": 0, "Concrete Inputs": ["[218112000]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2338708, "tid": 2338708, + "ts": 6345940112656.525, "dur": 13.482, + "args": { + "External id": 985602,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "27264000"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940112662.521, "dur": 6.860, + "args": { + "External id": 985603,"Record function id": 0, "Concrete Inputs": ["", "0", "54528000", "81792000", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[218112000], [], [], [], []], "Ev Idx": 18503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112667.122, "dur": 0.636, + "args": { + "External id": 985604,"Record function id": 0, "Concrete Inputs": ["", "[27264000]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[218112000], [], [], []], "Ev Idx": 18504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2338708, "tid": 2338708, + "ts": 6345940112679.369, "dur": 44.986, + "args": { + "External id": 985605,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[27264000], [], []], "Ev Idx": 18505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112688.609, "dur": 0.538, + "args": { + "External id": 985606,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "54528000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112693.845, "dur": 0.358, + "args": { + "External id": 985607,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "54528512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112695.783, "dur": 0.585, + "args": { + "External id": 985608,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "56625664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112697.968, "dur": 5.654, + "args": { + "External id": 985609,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "57149952"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112704.803, "dur": 0.349, + "args": { + "External id": 985610,"Record function id": 0, "Concrete Inputs": ["", "[2097152]", "[1]", "57674240"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112707.904, "dur": 0.245, + "args": { + "External id": 985611,"Record function id": 0, "Concrete Inputs": ["", "[512]", "[1]", "59771392"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112712.416, "dur": 0.292, + "args": { + "External id": 985612,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "59771904"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112714.342, "dur": 0.272, + "args": { + "External id": 985613,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "67111936"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940112715.879, "dur": 0.297, + "args": { + "External id": 985614,"Record function id": 0, "Concrete Inputs": ["", "[7340032]", "[1]", "74451968"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[27264000], [], [], []], "Ev Idx": 18514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940112742.367, "dur": 42.598, + "args": { + "External id": 985615,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], [[512], [2097152], [524288], [524288], [2097152], [512], [7340032], [7340032], [7340032]], []], "Ev Idx": 18515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2338708, "tid": 2338708, + "ts": 6345940112871.017, "dur": 515.785, + "args": { + "External id": 985616,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[218112000], [27264000], [], [], []], "Ev Idx": 18516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345940112910.128, "dur": 469.386, + "args": { + "External id": 985617,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 2, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 218112000, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[27264000], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 18517, "In msg nelems": 27264000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2338708, "tid": 2338708, + "ts": 6345940112923.766, "dur": 447.573, + "args": { + "External id": 985618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[27264000]], "Ev Idx": 18518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345940113423.420, "dur": 2.932, + "args": { + "External id": 985619,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 18519, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2338708, "tid": 2338708, + "ts": 6345940113553.373, "dur": 35788.417, + "args": { + "External id": 985620,"Record function id": 0, "Ev Idx": 18520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940113682.316, "dur": 8.553, + "args": { + "External id": 985621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[218112000], []], "Ev Idx": 18521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940113695.145, "dur": 1.425, + "args": { + "External id": 985622,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940113698.485, "dur": 4.704, + "args": { + "External id": 985623,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940113705.246, "dur": 1.441, + "args": { + "External id": 985624,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940113708.260, "dur": 1.124, + "args": { + "External id": 985625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 18525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940113710.771, "dur": 1.374, + "args": { + "External id": 985626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[16777216], []], "Ev Idx": 18526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940113713.926, "dur": 1.279, + "args": { + "External id": 985627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 18527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940113717.017, "dur": 3.861, + "args": { + "External id": 985628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940113722.704, "dur": 1.172, + "args": { + "External id": 985629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940113729.393, "dur": 1.039, + "args": { + "External id": 985630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[58720256], []], "Ev Idx": 18530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940113754.572, "dur": 35533.857, + "args": { + "External id": 985631,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940113774.811, "dur": 35504.199, + "args": { + "External id": 985632,"Record function id": 0, "Concrete Inputs": ["", "[512, 2097152, 524288, 524288, 2097152, 512, 7340032, 7340032, 7340032]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[27264000, 1], [], [], [[512, 1], [2097152, 1], [524288, 1], [524288, 1], [2097152, 1], [512, 1], [7340032, 1], [7340032, 1], [7340032, 1]]], "Input Dims": [[8, 27264000], [], [], [[8, 512], [8, 2097152], [8, 524288], [8, 524288], [8, 2097152], [8, 512], [8, 7340032], [8, 7340032], [8, 7340032]]], "Ev Idx": 18532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940113797.141, "dur": 22.250, + "args": { + "External id": 985633,"Record function id": 0, "Concrete Inputs": ["[3845]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345940113825.514, "dur": 35409.864, + "args": { + "External id": 985634,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], [], []], "Ev Idx": 18534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940113828.819, "dur": 35405.755, + "args": { + "External id": 985635,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3845], [], [], [], [], [], []], "Ev Idx": 18535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940113836.461, "dur": 7.346, + "args": { + "External id": 985636,"Record function id": 0, "Concrete Inputs": ["[3845]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940113845.996, "dur": 35384.555, + "args": { + "External id": 985637,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3845], [3845], []], "Ev Idx": 18537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940149518.735, "dur": 37.017, + "args": { + "External id": 985638,"Sequence number": 10552522, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096]], "Ev Idx": 18538 + } + }, + { + "ph": "s", "id": 397, "pid": 2338708, "tid": 2338708, "ts": 6345940149518.735, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345940149539.869, "dur": 10.010, + "args": { + "External id": 985639,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1]], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096]], "Ev Idx": 18539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940149544.166, "dur": 5.395, + "args": { + "External id": 985640,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345940149639.947, "dur": 94.157, + "args": { + "External id": 985641,"Record function id": 0, "Ev Idx": 18541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2338708, "tid": 2338708, + "ts": 6345940149735.802, "dur": 1392.574, + "args": { + "External id": 985642,"Record function id": 0, "Ev Idx": 18542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940149784.666, "dur": 1325.084, + "args": { + "External id": 985643,"Sequence number": 10552523, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1], [4096, 1], [4096, 1], [4096, 1], [64, 1], [64, 1], [4096, 1], [1], [4096, 1], [4096, 1], [14336, 1]], "Input Dims": [[8, 4096, 4096], [4096], [4096, 4096], [1024, 4096], [1024, 4096], [4096, 64], [4096, 64], [4096, 4096], [4096], [14336, 4096], [14336, 4096], [4096, 14336]], "Ev Idx": 18543 + } + }, + { + "ph": "s", "id": 396, "pid": 2338708, "tid": 2338708, "ts": 6345940149784.666, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345940149866.951, "dur": 56.417, + "args": { + "External id": 985644,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940149938.551, "dur": 176.844, + "args": { + "External id": 985645,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940150134.134, "dur": 47.069, + "args": { + "External id": 985646,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940150192.964, "dur": 35.107, + "args": { + "External id": 985647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [1024, 1]], "Input Dims": [[32768, 4096], [4096, 1024], [32768, 1024]], "Ev Idx": 18547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2338708, "tid": 2338708, + "ts": 6345940150261.751, "dur": 36.773, + "args": { + "External id": 985648,"kernel_hash": "c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "32", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/4x/c4xr6tsggww762774sfwerumrgx5dmuiseaxulkxhgszyi6uokxz.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [16777216, 4096, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [8, 4096, 32, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_2", "pid": 2338708, "tid": 2338708, + "ts": 6345940150330.335, "dur": 22.494, + "args": { + "External id": 985649,"kernel_hash": "c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "8", "4096", "8", "128", "64", "4096", "32", "128", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/47/c47sets7iwljfeskjdsbtfo56xxdvaxaqf4yrkbdakof6xlq5huw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 18549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345940150383.073, "dur": 163.833, + "args": { + "External id": 985650,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940150450.849, "dur": 14.203, + "args": { + "External id": 985651,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940150457.080, "dur": 7.051, + "args": { + "External id": 985652,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940150468.231, "dur": 5.509, + "args": { + "External id": 985653,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940150475.652, "dur": 1.425, + "args": { + "External id": 985654,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940150479.817, "dur": 6.652, + "args": { + "External id": 985655,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940150559.965, "dur": 55.622, + "args": { + "External id": 985656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [4096, 1]], "Input Dims": [[32768, 4096], [4096, 4096], [32768, 4096]], "Ev Idx": 18556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_3", "pid": 2338708, "tid": 2338708, + "ts": 6345940150652.420, "dur": 36.459, + "args": { + "External id": 985657,"kernel_hash": "clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ly/clymgqs326qyhstxcqtlwadiqygkm4khi54mkcdztchgndlk5tzw.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [4096, 1], [4096, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768, 4096], [4096], [32768, 4096], [32768, 4096], [32768], [], [], [], [], [], [], [], [], []], "Ev Idx": 18557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940150701.310, "dur": 48.977, + "args": { + "External id": 985658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940150764.031, "dur": 40.953, + "args": { + "External id": 985659,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096], [14336, 1]], "Input Dims": [[32768, 4096], [4096, 14336], [32768, 14336]], "Ev Idx": 18559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2338708, "tid": 2338708, + "ts": 6345940150832.657, "dur": 33.030, + "args": { + "External id": 985660,"kernel_hash": "cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2", "grid": "grid(469762048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "469762048"], "kernel_file": "/tmp/torchinductor_cvm/vj/cvjugobblojvgzrfhu5z5uctngmtt6oxwwsb6llrpdjws7vq42p2.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[14336, 1], [14336, 1], [58720256, 14336, 1], []], "Input Dims": [[32768, 14336], [32768, 14336], [8, 4096, 14336], []], "Ev Idx": 18560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940150875.311, "dur": 43.629, + "args": { + "External id": 985661,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336], [4096, 1]], "Input Dims": [[32768, 14336], [14336, 4096], [32768, 4096]], "Ev Idx": 18561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2338708, "tid": 2338708, + "ts": 6345940150943.344, "dur": 23.219, + "args": { + "External id": 985662,"kernel_hash": "cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/qq/cqqk745ynmljfbwrncydjaxstpncc7oijqrxif6lbjzkf43kjbsm.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [32768, 4096], []], "Ev Idx": 18562 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 2338708, "tid": 2338708, + "ts": 6345940151213.337, "dur": 42.765, + "args": { + "External id": 985663,"Record function id": 0, "Ev Idx": 18563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940151431.681, "dur": 359.439, + "args": { + "External id": 985664,"Sequence number": 10552524, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18564 + } + }, + { + "ph": "s", "id": 395, "pid": 2338708, "tid": 2338708, "ts": 6345940151431.681, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940151477.111, "dur": 10.095, + "args": { + "External id": 985665,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940151479.582, "dur": 7.338, + "args": { + "External id": 985666,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940151499.591, "dur": 18.022, + "args": { + "External id": 985667,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940151504.746, "dur": 12.141, + "args": { + "External id": 985668,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940151529.401, "dur": 6.595, + "args": { + "External id": 985669,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940151767.536, "dur": 7.809, + "args": { + "External id": 985670,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940151772.020, "dur": 3.046, + "args": { + "External id": 985671,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940151827.164, "dur": 176.962, + "args": { + "External id": 985672,"Sequence number": 10552525, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940151830.951, "dur": 21.212, + "args": { + "External id": 985673,"Sequence number": 10552525, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18573 + } + }, + { + "ph": "s", "id": 394, "pid": 2338708, "tid": 2338708, "ts": 6345940151830.951, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940151838.508, "dur": 11.189, + "args": { + "External id": 985674,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940151846.643, "dur": 2.549, + "args": { + "External id": 985675,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940151854.924, "dur": 148.853, + "args": { + "External id": 985676,"Sequence number": 10552526, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940151858.671, "dur": 5.728, + "args": { + "External id": 985677,"Sequence number": 10552526, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940151859.965, "dur": 4.254, + "args": { + "External id": 985678,"Sequence number": 10552526, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18578 + } + }, + { + "ph": "s", "id": 393, "pid": 2338708, "tid": 2338708, "ts": 6345940151859.965, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940151869.157, "dur": 120.768, + "args": { + "External id": 985679,"Sequence number": 10552527, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18579 + } + }, + { + "ph": "s", "id": 392, "pid": 2338708, "tid": 2338708, "ts": 6345940151869.157, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940151995.204, "dur": 7.347, + "args": { + "External id": 985680,"Sequence number": 10552528, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18580 + } + }, + { + "ph": "s", "id": 391, "pid": 2338708, "tid": 2338708, "ts": 6345940151995.204, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940152039.164, "dur": 146.161, + "args": { + "External id": 985681,"Sequence number": 10552529, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940152040.397, "dur": 52.060, + "args": { + "External id": 985682,"Sequence number": 10552529, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18582 + } + }, + { + "ph": "s", "id": 390, "pid": 2338708, "tid": 2338708, "ts": 6345940152040.397, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940152042.917, "dur": 7.242, + "args": { + "External id": 985683,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940152048.188, "dur": 1.563, + "args": { + "External id": 985684,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940152094.873, "dur": 90.200, + "args": { + "External id": 985685,"Sequence number": 10552530, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940152096.999, "dur": 9.093, + "args": { + "External id": 985686,"Sequence number": 10552530, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940152098.244, "dur": 7.680, + "args": { + "External id": 985687,"Sequence number": 10552530, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18587 + } + }, + { + "ph": "s", "id": 389, "pid": 2338708, "tid": 2338708, "ts": 6345940152098.244, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940152106.868, "dur": 68.968, + "args": { + "External id": 985688,"Sequence number": 10552531, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18588 + } + }, + { + "ph": "s", "id": 388, "pid": 2338708, "tid": 2338708, "ts": 6345940152106.868, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940152178.609, "dur": 5.610, + "args": { + "External id": 985689,"Sequence number": 10552532, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18589 + } + }, + { + "ph": "s", "id": 387, "pid": 2338708, "tid": 2338708, "ts": 6345940152178.609, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940152198.042, "dur": 86.537, + "args": { + "External id": 985690,"Sequence number": 10552533, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940152198.914, "dur": 7.873, + "args": { + "External id": 985691,"Sequence number": 10552533, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18591 + } + }, + { + "ph": "s", "id": 386, "pid": 2338708, "tid": 2338708, "ts": 6345940152198.914, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940152201.626, "dur": 3.449, + "args": { + "External id": 985692,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940152203.800, "dur": 0.928, + "args": { + "External id": 985693,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940152210.409, "dur": 73.949, + "args": { + "External id": 985694,"Sequence number": 10552534, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940152211.684, "dur": 8.027, + "args": { + "External id": 985695,"Sequence number": 10552534, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940152213.079, "dur": 6.450, + "args": { + "External id": 985696,"Sequence number": 10552534, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18596 + } + }, + { + "ph": "s", "id": 385, "pid": 2338708, "tid": 2338708, "ts": 6345940152213.079, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940152220.447, "dur": 52.856, + "args": { + "External id": 985697,"Sequence number": 10552535, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18597 + } + }, + { + "ph": "s", "id": 384, "pid": 2338708, "tid": 2338708, "ts": 6345940152220.447, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940152275.822, "dur": 8.163, + "args": { + "External id": 985698,"Sequence number": 10552536, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18598 + } + }, + { + "ph": "s", "id": 383, "pid": 2338708, "tid": 2338708, "ts": 6345940152275.822, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940152314.016, "dur": 4.840, + "args": { + "External id": 985699,"Sequence number": 10552537, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940152315.283, "dur": 3.386, + "args": { + "External id": 985700,"Sequence number": 10552537, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18600 + } + }, + { + "ph": "s", "id": 382, "pid": 2338708, "tid": 2338708, "ts": 6345940152315.283, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940152327.900, "dur": 7.939, + "args": { + "External id": 985701,"Sequence number": 10552538, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940152329.288, "dur": 6.412, + "args": { + "External id": 985702,"Sequence number": 10552538, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18602 + } + }, + { + "ph": "s", "id": 381, "pid": 2338708, "tid": 2338708, "ts": 6345940152329.288, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940152344.580, "dur": 6.072, + "args": { + "External id": 985703,"Sequence number": 10552539, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940152345.918, "dur": 4.563, + "args": { + "External id": 985704,"Sequence number": 10552539, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18604 + } + }, + { + "ph": "s", "id": 380, "pid": 2338708, "tid": 2338708, "ts": 6345940152345.918, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940152396.285, "dur": 233.382, + "args": { + "External id": 985705,"Sequence number": 10552540, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18605 + } + }, + { + "ph": "s", "id": 379, "pid": 2338708, "tid": 2338708, "ts": 6345940152396.285, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940152426.025, "dur": 11.857, + "args": { + "External id": 985706,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940152430.020, "dur": 7.398, + "args": { + "External id": 985707,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940152645.982, "dur": 140.703, + "args": { + "External id": 985708,"Sequence number": 10552541, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18608 + } + }, + { + "ph": "s", "id": 378, "pid": 2338708, "tid": 2338708, "ts": 6345940152645.982, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940152663.355, "dur": 9.105, + "args": { + "External id": 985709,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940152666.601, "dur": 5.444, + "args": { + "External id": 985710,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338708, "tid": 2338708, + "ts": 6345940152823.743, "dur": 309.902, + "args": { + "External id": 985711,"Sequence number": 10552542, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18611 + } + }, + { + "ph": "s", "id": 377, "pid": 2338708, "tid": 2338708, "ts": 6345940152823.743, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345940152867.383, "dur": 183.451, + "args": { + "External id": 985712,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940152933.946, "dur": 11.051, + "args": { + "External id": 985713,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940152937.116, "dur": 7.316, + "args": { + "External id": 985714,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940152948.221, "dur": 4.706, + "args": { + "External id": 985715,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940152954.296, "dur": 1.442, + "args": { + "External id": 985716,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940152958.710, "dur": 4.942, + "args": { + "External id": 985717,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2338708, + "ts": 6345940153110.350, "dur": 7.867, + "args": { + "External id": 985718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940153141.590, "dur": 7.834, + "args": { + "External id": 985719,"Sequence number": 10552543, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940153143.500, "dur": 5.672, + "args": { + "External id": 985720,"Sequence number": 10552543, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18620 + } + }, + { + "ph": "s", "id": 376, "pid": 2338708, "tid": 2338708, "ts": 6345940153143.500, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940153168.209, "dur": 148.834, + "args": { + "External id": 985721,"Sequence number": 10552544, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940153172.662, "dur": 10.938, + "args": { + "External id": 985722,"Sequence number": 10552544, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18622 + } + }, + { + "ph": "s", "id": 375, "pid": 2338708, "tid": 2338708, "ts": 6345940153172.662, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940153176.505, "dur": 5.585, + "args": { + "External id": 985723,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940153179.713, "dur": 2.028, + "args": { + "External id": 985724,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940153185.293, "dur": 131.311, + "args": { + "External id": 985725,"Sequence number": 10552545, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940153187.698, "dur": 7.711, + "args": { + "External id": 985726,"Sequence number": 10552545, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940153191.922, "dur": 3.319, + "args": { + "External id": 985727,"Sequence number": 10552545, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18627 + } + }, + { + "ph": "s", "id": 374, "pid": 2338708, "tid": 2338708, "ts": 6345940153191.922, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940153196.635, "dur": 111.463, + "args": { + "External id": 985728,"Sequence number": 10552546, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18628 + } + }, + { + "ph": "s", "id": 373, "pid": 2338708, "tid": 2338708, "ts": 6345940153196.635, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940153311.696, "dur": 4.123, + "args": { + "External id": 985729,"Sequence number": 10552547, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18629 + } + }, + { + "ph": "s", "id": 372, "pid": 2338708, "tid": 2338708, "ts": 6345940153311.696, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940153365.652, "dur": 291.708, + "args": { + "External id": 985730,"Sequence number": 10552548, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18630 + } + }, + { + "ph": "s", "id": 371, "pid": 2338708, "tid": 2338708, "ts": 6345940153365.652, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940153399.104, "dur": 6.171, + "args": { + "External id": 985731,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940153400.212, "dur": 4.833, + "args": { + "External id": 985732,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338708, "tid": 2338708, + "ts": 6345940153412.487, "dur": 6.654, + "args": { + "External id": 985733,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940153413.947, "dur": 5.034, + "args": { + "External id": 985734,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940153415.118, "dur": 3.734, + "args": { + "External id": 985735,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940153429.775, "dur": 12.276, + "args": { + "External id": 985736,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940153435.477, "dur": 6.170, + "args": { + "External id": 985737,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940153449.868, "dur": 3.920, + "args": { + "External id": 985738,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940153458.628, "dur": 3.394, + "args": { + "External id": 985739,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940153627.095, "dur": 4.698, + "args": { + "External id": 985740,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940153628.658, "dur": 2.962, + "args": { + "External id": 985741,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940153634.575, "dur": 6.158, + "args": { + "External id": 985742,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940153639.085, "dur": 1.527, + "args": { + "External id": 985743,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940153683.432, "dur": 113.891, + "args": { + "External id": 985744,"Sequence number": 10552549, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940153684.729, "dur": 9.360, + "args": { + "External id": 985745,"Sequence number": 10552549, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18645 + } + }, + { + "ph": "s", "id": 370, "pid": 2338708, "tid": 2338708, "ts": 6345940153684.729, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940153687.492, "dur": 4.750, + "args": { + "External id": 985746,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940153689.835, "dur": 1.867, + "args": { + "External id": 985747,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940153695.162, "dur": 101.904, + "args": { + "External id": 985748,"Sequence number": 10552550, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940153699.551, "dur": 4.021, + "args": { + "External id": 985749,"Sequence number": 10552550, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940153700.640, "dur": 2.797, + "args": { + "External id": 985750,"Sequence number": 10552550, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18650 + } + }, + { + "ph": "s", "id": 369, "pid": 2338708, "tid": 2338708, "ts": 6345940153700.640, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940153704.385, "dur": 83.747, + "args": { + "External id": 985751,"Sequence number": 10552551, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18651 + } + }, + { + "ph": "s", "id": 368, "pid": 2338708, "tid": 2338708, "ts": 6345940153704.385, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940153791.055, "dur": 5.337, + "args": { + "External id": 985752,"Sequence number": 10552552, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18652 + } + }, + { + "ph": "s", "id": 367, "pid": 2338708, "tid": 2338708, "ts": 6345940153791.055, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940153810.120, "dur": 82.952, + "args": { + "External id": 985753,"Sequence number": 10552553, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940153810.768, "dur": 8.188, + "args": { + "External id": 985754,"Sequence number": 10552553, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18654 + } + }, + { + "ph": "s", "id": 366, "pid": 2338708, "tid": 2338708, "ts": 6345940153810.768, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940153813.132, "dur": 3.797, + "args": { + "External id": 985755,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940153815.749, "dur": 0.951, + "args": { + "External id": 985756,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940153819.823, "dur": 72.980, + "args": { + "External id": 985757,"Sequence number": 10552554, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940153823.508, "dur": 7.443, + "args": { + "External id": 985758,"Sequence number": 10552554, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940153824.864, "dur": 5.925, + "args": { + "External id": 985759,"Sequence number": 10552554, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18659 + } + }, + { + "ph": "s", "id": 365, "pid": 2338708, "tid": 2338708, "ts": 6345940153824.864, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940153831.776, "dur": 56.290, + "args": { + "External id": 985760,"Sequence number": 10552555, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18660 + } + }, + { + "ph": "s", "id": 364, "pid": 2338708, "tid": 2338708, "ts": 6345940153831.776, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940153890.564, "dur": 1.812, + "args": { + "External id": 985761,"Sequence number": 10552556, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18661 + } + }, + { + "ph": "s", "id": 363, "pid": 2338708, "tid": 2338708, "ts": 6345940153890.564, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940153923.223, "dur": 287.613, + "args": { + "External id": 985762,"Sequence number": 10552557, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18662 + } + }, + { + "ph": "s", "id": 362, "pid": 2338708, "tid": 2338708, "ts": 6345940153923.223, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940153979.943, "dur": 5.314, + "args": { + "External id": 985763,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940154093.317, "dur": 95.420, + "args": { + "External id": 985764,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940154095.688, "dur": 12.225, + "args": { + "External id": 985765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940154100.821, "dur": 5.417, + "args": { + "External id": 985766,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940154104.127, "dur": 1.527, + "args": { + "External id": 985767,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940154109.185, "dur": 79.060, + "args": { + "External id": 985768,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940154114.211, "dur": 3.478, + "args": { + "External id": 985769,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940154115.684, "dur": 1.876, + "args": { + "External id": 985770,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940154118.358, "dur": 64.487, + "args": { + "External id": 985771,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940154185.484, "dur": 1.725, + "args": { + "External id": 985772,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345940154225.151, "dur": 39.447, + "args": { + "External id": 985773,"Sequence number": 10552558, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18673 + } + }, + { + "ph": "s", "id": 361, "pid": 2338708, "tid": 2338708, "ts": 6345940154225.151, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940154316.440, "dur": 231.804, + "args": { + "External id": 985774,"Sequence number": 10552559, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18674 + } + }, + { + "ph": "s", "id": 360, "pid": 2338708, "tid": 2338708, "ts": 6345940154316.440, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940154344.920, "dur": 4.428, + "args": { + "External id": 985775,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940154346.230, "dur": 2.981, + "args": { + "External id": 985776,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940154359.773, "dur": 8.232, + "args": { + "External id": 985777,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940154362.822, "dur": 4.702, + "args": { + "External id": 985778,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940154375.762, "dur": 4.222, + "args": { + "External id": 985779,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940154530.454, "dur": 3.554, + "args": { + "External id": 985780,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940154531.631, "dur": 2.141, + "args": { + "External id": 985781,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940154573.235, "dur": 109.721, + "args": { + "External id": 985782,"Sequence number": 10552560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940154577.428, "dur": 9.407, + "args": { + "External id": 985783,"Sequence number": 10552560, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18683 + } + }, + { + "ph": "s", "id": 359, "pid": 2338708, "tid": 2338708, "ts": 6345940154577.428, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940154581.000, "dur": 3.712, + "args": { + "External id": 985784,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940154582.963, "dur": 1.495, + "args": { + "External id": 985785,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940154588.006, "dur": 94.633, + "args": { + "External id": 985786,"Sequence number": 10552561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940154589.718, "dur": 11.302, + "args": { + "External id": 985787,"Sequence number": 10552561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940154593.846, "dur": 7.023, + "args": { + "External id": 985788,"Sequence number": 10552561, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18688 + } + }, + { + "ph": "s", "id": 358, "pid": 2338708, "tid": 2338708, "ts": 6345940154593.846, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940154602.265, "dur": 69.662, + "args": { + "External id": 985789,"Sequence number": 10552562, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18689 + } + }, + { + "ph": "s", "id": 357, "pid": 2338708, "tid": 2338708, "ts": 6345940154602.265, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940154674.835, "dur": 6.980, + "args": { + "External id": 985790,"Sequence number": 10552563, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18690 + } + }, + { + "ph": "s", "id": 356, "pid": 2338708, "tid": 2338708, "ts": 6345940154674.835, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940154692.634, "dur": 81.188, + "args": { + "External id": 985791,"Sequence number": 10552564, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940154693.522, "dur": 12.226, + "args": { + "External id": 985792,"Sequence number": 10552564, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18692 + } + }, + { + "ph": "s", "id": 355, "pid": 2338708, "tid": 2338708, "ts": 6345940154693.522, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940154701.626, "dur": 2.558, + "args": { + "External id": 985793,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940154703.154, "dur": 0.782, + "args": { + "External id": 985794,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940154706.552, "dur": 66.996, + "args": { + "External id": 985795,"Sequence number": 10552565, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940154707.999, "dur": 7.601, + "args": { + "External id": 985796,"Sequence number": 10552565, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940154712.285, "dur": 3.165, + "args": { + "External id": 985797,"Sequence number": 10552565, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18697 + } + }, + { + "ph": "s", "id": 354, "pid": 2338708, "tid": 2338708, "ts": 6345940154712.285, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940154716.545, "dur": 49.065, + "args": { + "External id": 985798,"Sequence number": 10552566, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18698 + } + }, + { + "ph": "s", "id": 353, "pid": 2338708, "tid": 2338708, "ts": 6345940154716.545, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940154767.840, "dur": 4.919, + "args": { + "External id": 985799,"Sequence number": 10552567, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18699 + } + }, + { + "ph": "s", "id": 352, "pid": 2338708, "tid": 2338708, "ts": 6345940154767.840, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940154782.507, "dur": 73.971, + "args": { + "External id": 985800,"Sequence number": 10552568, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940154783.332, "dur": 8.892, + "args": { + "External id": 985801,"Sequence number": 10552568, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18701 + } + }, + { + "ph": "s", "id": 351, "pid": 2338708, "tid": 2338708, "ts": 6345940154783.332, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940154785.126, "dur": 5.365, + "args": { + "External id": 985802,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940154789.621, "dur": 0.716, + "args": { + "External id": 985803,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940154793.039, "dur": 63.207, + "args": { + "External id": 985804,"Sequence number": 10552569, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940154793.999, "dur": 11.708, + "args": { + "External id": 985805,"Sequence number": 10552569, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940154795.318, "dur": 10.171, + "args": { + "External id": 985806,"Sequence number": 10552569, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18706 + } + }, + { + "ph": "s", "id": 350, "pid": 2338708, "tid": 2338708, "ts": 6345940154795.318, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940154809.280, "dur": 40.818, + "args": { + "External id": 985807,"Sequence number": 10552570, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18707 + } + }, + { + "ph": "s", "id": 349, "pid": 2338708, "tid": 2338708, "ts": 6345940154809.280, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940154852.790, "dur": 3.051, + "args": { + "External id": 985808,"Sequence number": 10552571, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18708 + } + }, + { + "ph": "s", "id": 348, "pid": 2338708, "tid": 2338708, "ts": 6345940154852.790, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940154875.514, "dur": 7.385, + "args": { + "External id": 985809,"Sequence number": 10552572, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940154876.605, "dur": 6.078, + "args": { + "External id": 985810,"Sequence number": 10552572, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18710 + } + }, + { + "ph": "s", "id": 347, "pid": 2338708, "tid": 2338708, "ts": 6345940154876.605, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940154890.708, "dur": 13.354, + "args": { + "External id": 985811,"Sequence number": 10552573, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940154901.210, "dur": 2.721, + "args": { + "External id": 985812,"Sequence number": 10552573, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18712 + } + }, + { + "ph": "s", "id": 346, "pid": 2338708, "tid": 2338708, "ts": 6345940154901.210, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940154909.226, "dur": 4.191, + "args": { + "External id": 985813,"Sequence number": 10552574, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940154910.372, "dur": 2.924, + "args": { + "External id": 985814,"Sequence number": 10552574, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18714 + } + }, + { + "ph": "s", "id": 345, "pid": 2338708, "tid": 2338708, "ts": 6345940154910.372, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940154947.435, "dur": 271.098, + "args": { + "External id": 985815,"Sequence number": 10552575, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18715 + } + }, + { + "ph": "s", "id": 344, "pid": 2338708, "tid": 2338708, "ts": 6345940154947.435, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940154971.133, "dur": 15.000, + "args": { + "External id": 985816,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940154979.092, "dur": 6.500, + "args": { + "External id": 985817,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940155241.172, "dur": 144.322, + "args": { + "External id": 985818,"Sequence number": 10552576, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18718 + } + }, + { + "ph": "s", "id": 343, "pid": 2338708, "tid": 2338708, "ts": 6345940155241.172, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940155260.436, "dur": 10.674, + "args": { + "External id": 985819,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940155263.839, "dur": 6.745, + "args": { + "External id": 985820,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338708, "tid": 2338708, + "ts": 6345940155420.927, "dur": 222.576, + "args": { + "External id": 985821,"Sequence number": 10552577, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18721 + } + }, + { + "ph": "s", "id": 342, "pid": 2338708, "tid": 2338708, "ts": 6345940155420.927, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345940155460.361, "dur": 152.141, + "args": { + "External id": 985822,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940155519.657, "dur": 11.456, + "args": { + "External id": 985823,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940155525.410, "dur": 5.147, + "args": { + "External id": 985824,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940155534.283, "dur": 4.612, + "args": { + "External id": 985825,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940155540.685, "dur": 1.121, + "args": { + "External id": 985826,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940155544.559, "dur": 4.720, + "args": { + "External id": 985827,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2338708, + "ts": 6345940155626.915, "dur": 5.836, + "args": { + "External id": 985828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940155649.641, "dur": 10.063, + "args": { + "External id": 985829,"Sequence number": 10552578, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940155651.659, "dur": 7.871, + "args": { + "External id": 985830,"Sequence number": 10552578, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18730 + } + }, + { + "ph": "s", "id": 341, "pid": 2338708, "tid": 2338708, "ts": 6345940155651.659, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940155675.389, "dur": 143.329, + "args": { + "External id": 985831,"Sequence number": 10552579, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940155677.259, "dur": 10.905, + "args": { + "External id": 985832,"Sequence number": 10552579, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18732 + } + }, + { + "ph": "s", "id": 340, "pid": 2338708, "tid": 2338708, "ts": 6345940155677.259, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940155680.996, "dur": 5.502, + "args": { + "External id": 985833,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940155684.158, "dur": 1.961, + "args": { + "External id": 985834,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940155689.682, "dur": 128.512, + "args": { + "External id": 985835,"Sequence number": 10552580, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940155695.122, "dur": 4.200, + "args": { + "External id": 985836,"Sequence number": 10552580, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940155695.799, "dur": 3.350, + "args": { + "External id": 985837,"Sequence number": 10552580, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18737 + } + }, + { + "ph": "s", "id": 339, "pid": 2338708, "tid": 2338708, "ts": 6345940155695.799, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940155700.587, "dur": 105.575, + "args": { + "External id": 985838,"Sequence number": 10552581, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18738 + } + }, + { + "ph": "s", "id": 338, "pid": 2338708, "tid": 2338708, "ts": 6345940155700.587, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940155809.275, "dur": 8.072, + "args": { + "External id": 985839,"Sequence number": 10552582, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18739 + } + }, + { + "ph": "s", "id": 337, "pid": 2338708, "tid": 2338708, "ts": 6345940155809.275, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940155863.046, "dur": 334.122, + "args": { + "External id": 985840,"Sequence number": 10552583, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18740 + } + }, + { + "ph": "s", "id": 336, "pid": 2338708, "tid": 2338708, "ts": 6345940155863.046, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940155883.900, "dur": 5.350, + "args": { + "External id": 985841,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940155887.281, "dur": 1.734, + "args": { + "External id": 985842,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338708, "tid": 2338708, + "ts": 6345940155894.231, "dur": 6.627, + "args": { + "External id": 985843,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940155895.415, "dur": 5.298, + "args": { + "External id": 985844,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940155899.329, "dur": 1.088, + "args": { + "External id": 985845,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940155910.457, "dur": 7.936, + "args": { + "External id": 985846,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940155912.953, "dur": 5.115, + "args": { + "External id": 985847,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940155925.317, "dur": 3.835, + "args": { + "External id": 985848,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940155933.939, "dur": 5.830, + "args": { + "External id": 985849,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940156165.208, "dur": 6.200, + "args": { + "External id": 985850,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940156166.561, "dur": 4.398, + "args": { + "External id": 985851,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940156174.884, "dur": 2.933, + "args": { + "External id": 985852,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940156176.294, "dur": 1.392, + "args": { + "External id": 985853,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940156222.197, "dur": 134.755, + "args": { + "External id": 985854,"Sequence number": 10552584, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940156223.821, "dur": 13.092, + "args": { + "External id": 985855,"Sequence number": 10552584, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18755 + } + }, + { + "ph": "s", "id": 335, "pid": 2338708, "tid": 2338708, "ts": 6345940156223.821, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940156227.443, "dur": 8.004, + "args": { + "External id": 985856,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940156233.268, "dur": 1.820, + "args": { + "External id": 985857,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940156238.506, "dur": 118.078, + "args": { + "External id": 985858,"Sequence number": 10552585, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940156240.518, "dur": 7.353, + "args": { + "External id": 985859,"Sequence number": 10552585, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940156241.497, "dur": 6.217, + "args": { + "External id": 985860,"Sequence number": 10552585, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18760 + } + }, + { + "ph": "s", "id": 334, "pid": 2338708, "tid": 2338708, "ts": 6345940156241.497, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940156248.888, "dur": 100.016, + "args": { + "External id": 985861,"Sequence number": 10552586, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18761 + } + }, + { + "ph": "s", "id": 333, "pid": 2338708, "tid": 2338708, "ts": 6345940156248.888, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940156352.125, "dur": 3.641, + "args": { + "External id": 985862,"Sequence number": 10552587, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18762 + } + }, + { + "ph": "s", "id": 332, "pid": 2338708, "tid": 2338708, "ts": 6345940156352.125, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940156368.184, "dur": 85.284, + "args": { + "External id": 985863,"Sequence number": 10552588, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940156369.168, "dur": 7.113, + "args": { + "External id": 985864,"Sequence number": 10552588, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18764 + } + }, + { + "ph": "s", "id": 331, "pid": 2338708, "tid": 2338708, "ts": 6345940156369.168, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940156371.517, "dur": 2.940, + "args": { + "External id": 985865,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940156373.350, "dur": 0.944, + "args": { + "External id": 985866,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940156379.667, "dur": 73.508, + "args": { + "External id": 985867,"Sequence number": 10552589, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940156380.994, "dur": 10.692, + "args": { + "External id": 985868,"Sequence number": 10552589, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940156381.824, "dur": 9.655, + "args": { + "External id": 985869,"Sequence number": 10552589, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18769 + } + }, + { + "ph": "s", "id": 330, "pid": 2338708, "tid": 2338708, "ts": 6345940156381.824, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940156392.432, "dur": 53.414, + "args": { + "External id": 985870,"Sequence number": 10552590, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18770 + } + }, + { + "ph": "s", "id": 329, "pid": 2338708, "tid": 2338708, "ts": 6345940156392.432, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940156447.990, "dur": 4.687, + "args": { + "External id": 985871,"Sequence number": 10552591, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18771 + } + }, + { + "ph": "s", "id": 328, "pid": 2338708, "tid": 2338708, "ts": 6345940156447.990, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940156480.630, "dur": 185.778, + "args": { + "External id": 985872,"Sequence number": 10552592, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18772 + } + }, + { + "ph": "s", "id": 327, "pid": 2338708, "tid": 2338708, "ts": 6345940156480.630, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940156529.155, "dur": 5.835, + "args": { + "External id": 985873,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940156579.701, "dur": 70.351, + "args": { + "External id": 985874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940156580.763, "dur": 8.503, + "args": { + "External id": 985875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940156582.262, "dur": 5.743, + "args": { + "External id": 985876,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940156586.431, "dur": 1.318, + "args": { + "External id": 985877,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940156590.206, "dur": 59.419, + "args": { + "External id": 985878,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940156591.808, "dur": 3.236, + "args": { + "External id": 985879,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940156593.383, "dur": 1.495, + "args": { + "External id": 985880,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940156595.879, "dur": 49.536, + "args": { + "External id": 985881,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940156647.786, "dur": 1.067, + "args": { + "External id": 985882,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345940156677.079, "dur": 31.347, + "args": { + "External id": 985883,"Sequence number": 10552593, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18783 + } + }, + { + "ph": "s", "id": 326, "pid": 2338708, "tid": 2338708, "ts": 6345940156677.079, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940156754.956, "dur": 220.186, + "args": { + "External id": 985884,"Sequence number": 10552594, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18784 + } + }, + { + "ph": "s", "id": 325, "pid": 2338708, "tid": 2338708, "ts": 6345940156754.956, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940156776.942, "dur": 3.859, + "args": { + "External id": 985885,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940156778.041, "dur": 2.584, + "args": { + "External id": 985886,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940156790.516, "dur": 8.336, + "args": { + "External id": 985887,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940156793.620, "dur": 4.806, + "args": { + "External id": 985888,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940156806.580, "dur": 7.505, + "args": { + "External id": 985889,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940156958.486, "dur": 3.583, + "args": { + "External id": 985890,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940156959.881, "dur": 2.009, + "args": { + "External id": 985891,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940156998.519, "dur": 190.146, + "args": { + "External id": 985892,"Sequence number": 10552595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940156999.612, "dur": 29.327, + "args": { + "External id": 985893,"Sequence number": 10552595, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18793 + } + }, + { + "ph": "s", "id": 324, "pid": 2338708, "tid": 2338708, "ts": 6345940156999.612, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940157002.311, "dur": 3.964, + "args": { + "External id": 985894,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940157004.771, "dur": 1.291, + "args": { + "External id": 985895,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940157031.233, "dur": 157.088, + "args": { + "External id": 985896,"Sequence number": 10552596, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940157036.262, "dur": 6.137, + "args": { + "External id": 985897,"Sequence number": 10552596, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940157037.401, "dur": 4.780, + "args": { + "External id": 985898,"Sequence number": 10552596, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18798 + } + }, + { + "ph": "s", "id": 323, "pid": 2338708, "tid": 2338708, "ts": 6345940157037.401, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940157043.219, "dur": 133.307, + "args": { + "External id": 985899,"Sequence number": 10552597, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18799 + } + }, + { + "ph": "s", "id": 322, "pid": 2338708, "tid": 2338708, "ts": 6345940157043.219, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940157181.086, "dur": 6.287, + "args": { + "External id": 985900,"Sequence number": 10552598, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18800 + } + }, + { + "ph": "s", "id": 321, "pid": 2338708, "tid": 2338708, "ts": 6345940157181.086, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940157208.427, "dur": 93.685, + "args": { + "External id": 985901,"Sequence number": 10552599, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940157209.788, "dur": 7.778, + "args": { + "External id": 985902,"Sequence number": 10552599, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18802 + } + }, + { + "ph": "s", "id": 320, "pid": 2338708, "tid": 2338708, "ts": 6345940157209.788, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940157212.447, "dur": 3.240, + "args": { + "External id": 985903,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940157214.146, "dur": 1.343, + "args": { + "External id": 985904,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940157218.482, "dur": 83.368, + "args": { + "External id": 985905,"Sequence number": 10552600, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940157223.126, "dur": 7.090, + "args": { + "External id": 985906,"Sequence number": 10552600, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940157224.219, "dur": 5.787, + "args": { + "External id": 985907,"Sequence number": 10552600, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18807 + } + }, + { + "ph": "s", "id": 319, "pid": 2338708, "tid": 2338708, "ts": 6345940157224.219, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940157231.066, "dur": 65.171, + "args": { + "External id": 985908,"Sequence number": 10552601, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18808 + } + }, + { + "ph": "s", "id": 318, "pid": 2338708, "tid": 2338708, "ts": 6345940157231.066, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940157298.596, "dur": 2.813, + "args": { + "External id": 985909,"Sequence number": 10552602, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18809 + } + }, + { + "ph": "s", "id": 317, "pid": 2338708, "tid": 2338708, "ts": 6345940157298.596, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940157311.314, "dur": 77.573, + "args": { + "External id": 985910,"Sequence number": 10552603, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940157312.158, "dur": 11.729, + "args": { + "External id": 985911,"Sequence number": 10552603, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18811 + } + }, + { + "ph": "s", "id": 316, "pid": 2338708, "tid": 2338708, "ts": 6345940157312.158, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940157316.703, "dur": 5.443, + "args": { + "External id": 985912,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940157321.199, "dur": 0.751, + "args": { + "External id": 985913,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940157324.766, "dur": 63.816, + "args": { + "External id": 985914,"Sequence number": 10552604, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940157325.853, "dur": 9.812, + "args": { + "External id": 985915,"Sequence number": 10552604, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940157329.190, "dur": 6.189, + "args": { + "External id": 985916,"Sequence number": 10552604, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18816 + } + }, + { + "ph": "s", "id": 315, "pid": 2338708, "tid": 2338708, "ts": 6345940157329.190, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940157336.416, "dur": 44.851, + "args": { + "External id": 985917,"Sequence number": 10552605, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18817 + } + }, + { + "ph": "s", "id": 314, "pid": 2338708, "tid": 2338708, "ts": 6345940157336.416, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940157383.387, "dur": 4.774, + "args": { + "External id": 985918,"Sequence number": 10552606, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18818 + } + }, + { + "ph": "s", "id": 313, "pid": 2338708, "tid": 2338708, "ts": 6345940157383.387, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940157409.205, "dur": 7.072, + "args": { + "External id": 985919,"Sequence number": 10552607, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940157410.387, "dur": 5.723, + "args": { + "External id": 985920,"Sequence number": 10552607, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18820 + } + }, + { + "ph": "s", "id": 312, "pid": 2338708, "tid": 2338708, "ts": 6345940157410.387, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940157425.029, "dur": 4.760, + "args": { + "External id": 985921,"Sequence number": 10552608, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940157426.574, "dur": 3.063, + "args": { + "External id": 985922,"Sequence number": 10552608, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18822 + } + }, + { + "ph": "s", "id": 311, "pid": 2338708, "tid": 2338708, "ts": 6345940157426.574, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940157434.767, "dur": 6.507, + "args": { + "External id": 985923,"Sequence number": 10552609, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940157436.313, "dur": 4.816, + "args": { + "External id": 985924,"Sequence number": 10552609, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18824 + } + }, + { + "ph": "s", "id": 310, "pid": 2338708, "tid": 2338708, "ts": 6345940157436.313, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940157478.402, "dur": 185.446, + "args": { + "External id": 985925,"Sequence number": 10552610, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18825 + } + }, + { + "ph": "s", "id": 309, "pid": 2338708, "tid": 2338708, "ts": 6345940157478.402, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940157502.187, "dur": 10.534, + "args": { + "External id": 985926,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940157505.693, "dur": 6.254, + "args": { + "External id": 985927,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940157678.688, "dur": 122.318, + "args": { + "External id": 985928,"Sequence number": 10552611, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18828 + } + }, + { + "ph": "s", "id": 308, "pid": 2338708, "tid": 2338708, "ts": 6345940157678.688, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940157693.900, "dur": 8.363, + "args": { + "External id": 985929,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940157696.529, "dur": 5.299, + "args": { + "External id": 985930,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338708, "tid": 2338708, + "ts": 6345940157835.651, "dur": 281.791, + "args": { + "External id": 985931,"Sequence number": 10552612, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18831 + } + }, + { + "ph": "s", "id": 307, "pid": 2338708, "tid": 2338708, "ts": 6345940157835.651, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345940157867.023, "dur": 173.640, + "args": { + "External id": 985932,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940157925.464, "dur": 7.852, + "args": { + "External id": 985933,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940157928.704, "dur": 4.130, + "args": { + "External id": 985934,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940157936.769, "dur": 6.928, + "args": { + "External id": 985935,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940157945.461, "dur": 1.150, + "args": { + "External id": 985936,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940157949.576, "dur": 6.209, + "args": { + "External id": 985937,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2338708, + "ts": 6345940158095.390, "dur": 6.419, + "args": { + "External id": 985938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940158125.710, "dur": 7.562, + "args": { + "External id": 985939,"Sequence number": 10552613, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940158127.344, "dur": 5.726, + "args": { + "External id": 985940,"Sequence number": 10552613, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18840 + } + }, + { + "ph": "s", "id": 306, "pid": 2338708, "tid": 2338708, "ts": 6345940158127.344, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940158150.713, "dur": 147.021, + "args": { + "External id": 985941,"Sequence number": 10552614, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940158152.720, "dur": 15.995, + "args": { + "External id": 985942,"Sequence number": 10552614, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18842 + } + }, + { + "ph": "s", "id": 305, "pid": 2338708, "tid": 2338708, "ts": 6345940158152.720, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940158159.494, "dur": 7.807, + "args": { + "External id": 985943,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940158164.875, "dur": 2.074, + "args": { + "External id": 985944,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940158170.193, "dur": 127.154, + "args": { + "External id": 985945,"Sequence number": 10552615, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940158172.318, "dur": 4.867, + "args": { + "External id": 985946,"Sequence number": 10552615, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940158173.518, "dur": 3.518, + "args": { + "External id": 985947,"Sequence number": 10552615, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18847 + } + }, + { + "ph": "s", "id": 304, "pid": 2338708, "tid": 2338708, "ts": 6345940158173.518, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940158178.310, "dur": 109.952, + "args": { + "External id": 985948,"Sequence number": 10552616, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18848 + } + }, + { + "ph": "s", "id": 303, "pid": 2338708, "tid": 2338708, "ts": 6345940158178.310, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940158291.556, "dur": 4.938, + "args": { + "External id": 985949,"Sequence number": 10552617, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18849 + } + }, + { + "ph": "s", "id": 302, "pid": 2338708, "tid": 2338708, "ts": 6345940158291.556, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940158343.026, "dur": 274.565, + "args": { + "External id": 985950,"Sequence number": 10552618, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18850 + } + }, + { + "ph": "s", "id": 301, "pid": 2338708, "tid": 2338708, "ts": 6345940158343.026, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940158368.342, "dur": 3.236, + "args": { + "External id": 985951,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940158369.468, "dur": 1.972, + "args": { + "External id": 985952,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338708, "tid": 2338708, + "ts": 6345940158376.777, "dur": 9.521, + "args": { + "External id": 985953,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940158383.877, "dur": 2.257, + "args": { + "External id": 985954,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940158384.692, "dur": 1.258, + "args": { + "External id": 985955,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940158396.834, "dur": 10.283, + "args": { + "External id": 985956,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940158400.086, "dur": 6.562, + "args": { + "External id": 985957,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940158414.700, "dur": 5.847, + "args": { + "External id": 985958,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940158427.415, "dur": 5.930, + "args": { + "External id": 985959,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940158593.741, "dur": 4.403, + "args": { + "External id": 985960,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940158594.971, "dur": 2.737, + "args": { + "External id": 985961,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940158601.116, "dur": 2.920, + "args": { + "External id": 985962,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940158602.350, "dur": 1.577, + "args": { + "External id": 985963,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940158640.170, "dur": 115.168, + "args": { + "External id": 985964,"Sequence number": 10552619, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940158641.375, "dur": 14.918, + "args": { + "External id": 985965,"Sequence number": 10552619, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18865 + } + }, + { + "ph": "s", "id": 300, "pid": 2338708, "tid": 2338708, "ts": 6345940158641.375, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940158647.935, "dur": 6.657, + "args": { + "External id": 985966,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940158650.422, "dur": 3.794, + "args": { + "External id": 985967,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940158657.579, "dur": 97.408, + "args": { + "External id": 985968,"Sequence number": 10552620, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940158659.331, "dur": 5.740, + "args": { + "External id": 985969,"Sequence number": 10552620, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940158660.424, "dur": 4.489, + "args": { + "External id": 985970,"Sequence number": 10552620, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18870 + } + }, + { + "ph": "s", "id": 299, "pid": 2338708, "tid": 2338708, "ts": 6345940158660.424, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940158665.985, "dur": 80.050, + "args": { + "External id": 985971,"Sequence number": 10552621, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18871 + } + }, + { + "ph": "s", "id": 298, "pid": 2338708, "tid": 2338708, "ts": 6345940158665.985, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940158748.569, "dur": 5.695, + "args": { + "External id": 985972,"Sequence number": 10552622, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18872 + } + }, + { + "ph": "s", "id": 297, "pid": 2338708, "tid": 2338708, "ts": 6345940158748.569, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940158765.949, "dur": 84.896, + "args": { + "External id": 985973,"Sequence number": 10552623, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940158766.533, "dur": 9.462, + "args": { + "External id": 985974,"Sequence number": 10552623, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18874 + } + }, + { + "ph": "s", "id": 296, "pid": 2338708, "tid": 2338708, "ts": 6345940158766.533, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940158768.531, "dur": 5.778, + "args": { + "External id": 985975,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940158773.264, "dur": 0.867, + "args": { + "External id": 985976,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940158776.734, "dur": 73.843, + "args": { + "External id": 985977,"Sequence number": 10552624, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940158777.987, "dur": 7.361, + "args": { + "External id": 985978,"Sequence number": 10552624, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940158778.887, "dur": 6.297, + "args": { + "External id": 985979,"Sequence number": 10552624, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18879 + } + }, + { + "ph": "s", "id": 295, "pid": 2338708, "tid": 2338708, "ts": 6345940158778.887, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940158788.432, "dur": 53.328, + "args": { + "External id": 985980,"Sequence number": 10552625, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18880 + } + }, + { + "ph": "s", "id": 294, "pid": 2338708, "tid": 2338708, "ts": 6345940158788.432, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940158844.232, "dur": 5.419, + "args": { + "External id": 985981,"Sequence number": 10552626, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18881 + } + }, + { + "ph": "s", "id": 293, "pid": 2338708, "tid": 2338708, "ts": 6345940158844.232, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940158876.621, "dur": 250.225, + "args": { + "External id": 985982,"Sequence number": 10552627, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18882 + } + }, + { + "ph": "s", "id": 292, "pid": 2338708, "tid": 2338708, "ts": 6345940158876.621, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940158922.807, "dur": 5.851, + "args": { + "External id": 985983,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940158967.324, "dur": 141.702, + "args": { + "External id": 985984,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940158968.505, "dur": 8.321, + "args": { + "External id": 985985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940158969.862, "dur": 5.759, + "args": { + "External id": 985986,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940158972.232, "dur": 2.972, + "args": { + "External id": 985987,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940158977.802, "dur": 130.668, + "args": { + "External id": 985988,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940158979.625, "dur": 2.654, + "args": { + "External id": 985989,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940158980.685, "dur": 1.472, + "args": { + "External id": 985990,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940158986.256, "dur": 114.296, + "args": { + "External id": 985991,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 18891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940159105.307, "dur": 2.165, + "args": { + "External id": 985992,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345940159139.492, "dur": 31.077, + "args": { + "External id": 985993,"Sequence number": 10552628, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 18893 + } + }, + { + "ph": "s", "id": 291, "pid": 2338708, "tid": 2338708, "ts": 6345940159139.492, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940159218.214, "dur": 232.258, + "args": { + "External id": 985994,"Sequence number": 10552629, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [], [], [], [], []], "Ev Idx": 18894 + } + }, + { + "ph": "s", "id": 290, "pid": 2338708, "tid": 2338708, "ts": 6345940159218.214, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940159242.767, "dur": 6.752, + "args": { + "External id": 985995,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940159244.514, "dur": 4.784, + "args": { + "External id": 985996,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940159259.750, "dur": 11.070, + "args": { + "External id": 985997,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940159265.457, "dur": 4.896, + "args": { + "External id": 985998,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940159278.741, "dur": 6.709, + "args": { + "External id": 985999,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940159434.878, "dur": 3.922, + "args": { + "External id": 986000,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940159436.359, "dur": 2.046, + "args": { + "External id": 986001,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940159474.462, "dur": 113.377, + "args": { + "External id": 986002,"Sequence number": 10552630, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940159476.040, "dur": 12.071, + "args": { + "External id": 986003,"Sequence number": 10552630, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18903 + } + }, + { + "ph": "s", "id": 289, "pid": 2338708, "tid": 2338708, "ts": 6345940159476.040, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940159478.874, "dur": 7.639, + "args": { + "External id": 986004,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940159484.704, "dur": 1.555, + "args": { + "External id": 986005,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940159489.364, "dur": 98.164, + "args": { + "External id": 986006,"Sequence number": 10552631, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940159491.626, "dur": 7.808, + "args": { + "External id": 986007,"Sequence number": 10552631, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940159492.637, "dur": 6.598, + "args": { + "External id": 986008,"Sequence number": 10552631, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18908 + } + }, + { + "ph": "s", "id": 288, "pid": 2338708, "tid": 2338708, "ts": 6345940159492.637, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940159500.360, "dur": 80.552, + "args": { + "External id": 986009,"Sequence number": 10552632, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18909 + } + }, + { + "ph": "s", "id": 287, "pid": 2338708, "tid": 2338708, "ts": 6345940159500.360, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940159583.285, "dur": 3.507, + "args": { + "External id": 986010,"Sequence number": 10552633, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18910 + } + }, + { + "ph": "s", "id": 286, "pid": 2338708, "tid": 2338708, "ts": 6345940159583.285, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940159597.639, "dur": 76.928, + "args": { + "External id": 986011,"Sequence number": 10552634, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940159598.567, "dur": 8.806, + "args": { + "External id": 986012,"Sequence number": 10552634, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18912 + } + }, + { + "ph": "s", "id": 285, "pid": 2338708, "tid": 2338708, "ts": 6345940159598.567, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940159600.647, "dur": 5.279, + "args": { + "External id": 986013,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940159604.916, "dur": 0.843, + "args": { + "External id": 986014,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940159610.878, "dur": 63.390, + "args": { + "External id": 986015,"Sequence number": 10552635, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940159612.434, "dur": 4.499, + "args": { + "External id": 986016,"Sequence number": 10552635, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940159613.516, "dur": 3.258, + "args": { + "External id": 986017,"Sequence number": 10552635, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18917 + } + }, + { + "ph": "s", "id": 284, "pid": 2338708, "tid": 2338708, "ts": 6345940159613.516, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940159617.836, "dur": 45.639, + "args": { + "External id": 986018,"Sequence number": 10552636, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18918 + } + }, + { + "ph": "s", "id": 283, "pid": 2338708, "tid": 2338708, "ts": 6345940159617.836, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940159665.562, "dur": 7.859, + "args": { + "External id": 986019,"Sequence number": 10552637, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18919 + } + }, + { + "ph": "s", "id": 282, "pid": 2338708, "tid": 2338708, "ts": 6345940159665.562, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940159687.117, "dur": 69.842, + "args": { + "External id": 986020,"Sequence number": 10552638, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [1024, 4096], []], "Ev Idx": 18920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940159688.241, "dur": 5.245, + "args": { + "External id": 986021,"Sequence number": 10552638, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[1024, 4096]], "Ev Idx": 18921 + } + }, + { + "ph": "s", "id": 281, "pid": 2338708, "tid": 2338708, "ts": 6345940159688.241, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940159689.611, "dur": 2.583, + "args": { + "External id": 986022,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1024, 4096], [], []], "Ev Idx": 18922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940159691.227, "dur": 0.782, + "args": { + "External id": 986023,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1024]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1024, 4096], [], [], []], "Ev Idx": 18923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940159694.208, "dur": 62.520, + "args": { + "External id": 986024,"Sequence number": 10552639, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 1024]], "Ev Idx": 18924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940159698.401, "dur": 7.266, + "args": { + "External id": 986025,"Sequence number": 10552639, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940159699.347, "dur": 6.140, + "args": { + "External id": 986026,"Sequence number": 10552639, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18926 + } + }, + { + "ph": "s", "id": 280, "pid": 2338708, "tid": 2338708, "ts": 6345940159699.347, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940159706.288, "dur": 44.913, + "args": { + "External id": 986027,"Sequence number": 10552640, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 1024]], "Ev Idx": 18927 + } + }, + { + "ph": "s", "id": 279, "pid": 2338708, "tid": 2338708, "ts": 6345940159706.288, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940159753.150, "dur": 3.127, + "args": { + "External id": 986028,"Sequence number": 10552641, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 1024]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1024, 1], []], "Input Dims": [[32768, 1024], []], "Ev Idx": 18928 + } + }, + { + "ph": "s", "id": 278, "pid": 2338708, "tid": 2338708, "ts": 6345940159753.150, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940159778.221, "dur": 5.721, + "args": { + "External id": 986029,"Sequence number": 10552642, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940159779.783, "dur": 4.024, + "args": { + "External id": 986030,"Sequence number": 10552642, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 32, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18930 + } + }, + { + "ph": "s", "id": 277, "pid": 2338708, "tid": 2338708, "ts": 6345940159779.783, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940159792.733, "dur": 6.614, + "args": { + "External id": 986031,"Sequence number": 10552643, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940159794.212, "dur": 4.982, + "args": { + "External id": 986032,"Sequence number": 10552643, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18932 + } + }, + { + "ph": "s", "id": 276, "pid": 2338708, "tid": 2338708, "ts": 6345940159794.212, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940159804.434, "dur": 5.900, + "args": { + "External id": 986033,"Sequence number": 10552644, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940159805.495, "dur": 4.653, + "args": { + "External id": 986034,"Sequence number": 10552644, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 8, 128]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4194304, 1024, 1], []], "Input Dims": [[8, 4096, 1024], []], "Ev Idx": 18934 + } + }, + { + "ph": "s", "id": 275, "pid": 2338708, "tid": 2338708, "ts": 6345940159805.495, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940159843.534, "dur": 202.133, + "args": { + "External id": 986035,"Sequence number": 10552645, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18935 + } + }, + { + "ph": "s", "id": 274, "pid": 2338708, "tid": 2338708, "ts": 6345940159843.534, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940159865.998, "dur": 9.206, + "args": { + "External id": 986036,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940159868.981, "dur": 5.820, + "args": { + "External id": 986037,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RotaryEmbeddingFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940160102.171, "dur": 147.221, + "args": { + "External id": 986038,"Sequence number": 10552646, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "False", "False", "0", "", "4096"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[4194304, 1024, 128, 1], [64, 1], [64, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [4096, 64], [4096, 64], [], [], [], [], []], "Ev Idx": 18938 + } + }, + { + "ph": "s", "id": 273, "pid": 2338708, "tid": 2338708, "ts": 6345940160102.171, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940160122.461, "dur": 13.583, + "args": { + "External id": 986039,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4194304, 1024, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 8, 128], [], [], [], [], []], "Ev Idx": 18939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940160125.888, "dur": 9.472, + "args": { + "External id": 986040,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 8, 128]", "[4194304, 1024, 128, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FlashAttnFunc", "pid": 2338708, "tid": 2338708, + "ts": 6345940160285.936, "dur": 220.699, + "args": { + "External id": 986041,"Sequence number": 10552647, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "", "True", "", "0.", "", "False", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "", "Scalar", "", "Scalar", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], [], []], "Ev Idx": 18941 + } + }, + { + "ph": "s", "id": 272, "pid": 2338708, "tid": 2338708, "ts": 6345940160285.936, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345940160317.743, "dur": 154.695, + "args": { + "External id": 986042,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.088388347648318447", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[16777216, 4096, 128, 1], [4194304, 1024, 128, 1], [4194304, 1024, 128, 1], [], [], [], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [8, 4096, 8, 128], [8, 4096, 8, 128], [], [], [], [], [], [], [], []], "Ev Idx": 18942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940160376.638, "dur": 7.763, + "args": { + "External id": 986043,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[16777216, 4096, 128, 1], [], [], [], [], []], "Input Dims": [[8, 4096, 32, 128], [], [], [], [], []], "Ev Idx": 18943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940160379.637, "dur": 4.292, + "args": { + "External id": 986044,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 32, 128]", "[16777216, 4096, 128, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940160387.867, "dur": 4.955, + "args": { + "External id": 986045,"Record function id": 0, "Concrete Inputs": ["[8, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940160399.155, "dur": 1.468, + "args": { + "External id": 986046,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940160403.503, "dur": 7.082, + "args": { + "External id": 986047,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2338708, "tid": 2338708, + "ts": 6345940160486.880, "dur": 5.936, + "args": { + "External id": 986048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[16777216, 4096, 128, 1]], "Input Dims": [[8, 4096, 32, 128]], "Ev Idx": 18948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940160513.243, "dur": 7.825, + "args": { + "External id": 986049,"Sequence number": 10552648, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940160515.092, "dur": 5.804, + "args": { + "External id": 986050,"Sequence number": 10552648, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 128, 1], []], "Input Dims": [[8, 4096, 32, 128], []], "Ev Idx": 18950 + } + }, + { + "ph": "s", "id": 271, "pid": 2338708, "tid": 2338708, "ts": 6345940160515.092, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940160536.512, "dur": 127.838, + "args": { + "External id": 986051,"Sequence number": 10552649, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [4096, 4096], []], "Ev Idx": 18951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940160538.135, "dur": 12.613, + "args": { + "External id": 986052,"Sequence number": 10552649, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[4096, 4096]], "Ev Idx": 18952 + } + }, + { + "ph": "s", "id": 270, "pid": 2338708, "tid": 2338708, "ts": 6345940160538.135, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940160541.281, "dur": 8.030, + "args": { + "External id": 986053,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[4096, 4096], [], []], "Ev Idx": 18953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940160546.918, "dur": 2.012, + "args": { + "External id": 986054,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[4096, 4096], [], [], []], "Ev Idx": 18954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940160552.235, "dur": 111.761, + "args": { + "External id": 986055,"Sequence number": 10552650, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 4096]], "Ev Idx": 18955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940160554.582, "dur": 4.469, + "args": { + "External id": 986056,"Sequence number": 10552650, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940160555.804, "dur": 3.086, + "args": { + "External id": 986057,"Sequence number": 10552650, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18957 + } + }, + { + "ph": "s", "id": 269, "pid": 2338708, "tid": 2338708, "ts": 6345940160555.804, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940160562.861, "dur": 92.819, + "args": { + "External id": 986058,"Sequence number": 10552651, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 4096]], "Ev Idx": 18958 + } + }, + { + "ph": "s", "id": 268, "pid": 2338708, "tid": 2338708, "ts": 6345940160562.861, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940160658.610, "dur": 4.678, + "args": { + "External id": 986059,"Sequence number": 10552652, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18959 + } + }, + { + "ph": "s", "id": 267, "pid": 2338708, "tid": 2338708, "ts": 6345940160658.610, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "LayerNormFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940160708.867, "dur": 249.406, + "args": { + "External id": 986060,"Sequence number": 10552653, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "9.9999999999999995e-07", "True", "False", "True"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16777216, 4096, 1], [1], [], [16777216, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [4096], [], [8, 4096, 4096], [], [], [], []], "Ev Idx": 18960 + } + }, + { + "ph": "s", "id": 266, "pid": 2338708, "tid": 2338708, "ts": 6345940160708.867, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940160731.341, "dur": 5.726, + "args": { + "External id": 986061,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940160735.341, "dur": 1.476, + "args": { + "External id": 986062,"Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape_as", "pid": 2338708, "tid": 2338708, + "ts": 6345940160741.288, "dur": 3.527, + "args": { + "External id": 986063,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [4096, 1]], "Input Dims": [[8, 4096, 4096], [32768, 4096]], "Ev Idx": 18963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940160742.528, "dur": 2.140, + "args": { + "External id": 986064,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940160743.295, "dur": 1.262, + "args": { + "External id": 986065,"Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940160754.000, "dur": 9.123, + "args": { + "External id": 986066,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 18966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940160756.527, "dur": 6.234, + "args": { + "External id": 986067,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940160773.022, "dur": 6.033, + "args": { + "External id": 986068,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940160786.377, "dur": 4.597, + "args": { + "External id": 986069,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940160933.364, "dur": 4.412, + "args": { + "External id": 986070,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940160934.968, "dur": 2.425, + "args": { + "External id": 986071,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940160940.932, "dur": 2.575, + "args": { + "External id": 986072,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940160942.321, "dur": 1.072, + "args": { + "External id": 986073,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 18973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940160979.286, "dur": 184.755, + "args": { + "External id": 986074,"Sequence number": 10552654, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940160980.291, "dur": 11.764, + "args": { + "External id": 986075,"Sequence number": 10552654, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18975 + } + }, + { + "ph": "s", "id": 265, "pid": 2338708, "tid": 2338708, "ts": 6345940160980.291, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940160985.348, "dur": 4.810, + "args": { + "External id": 986076,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940160987.723, "dur": 1.992, + "args": { + "External id": 986077,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940160993.178, "dur": 170.484, + "args": { + "External id": 986078,"Sequence number": 10552655, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940160994.775, "dur": 7.673, + "args": { + "External id": 986079,"Sequence number": 10552655, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940160998.724, "dur": 3.519, + "args": { + "External id": 986080,"Sequence number": 10552655, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18980 + } + }, + { + "ph": "s", "id": 264, "pid": 2338708, "tid": 2338708, "ts": 6345940160998.724, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940161003.261, "dur": 148.419, + "args": { + "External id": 986081,"Sequence number": 10552656, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18981 + } + }, + { + "ph": "s", "id": 263, "pid": 2338708, "tid": 2338708, "ts": 6345940161003.261, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940161156.310, "dur": 6.546, + "args": { + "External id": 986082,"Sequence number": 10552657, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18982 + } + }, + { + "ph": "s", "id": 262, "pid": 2338708, "tid": 2338708, "ts": 6345940161156.310, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940161178.674, "dur": 105.227, + "args": { + "External id": 986083,"Sequence number": 10552658, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[16777216, 4096, 1], [4096, 1], []], "Input Dims": [[8, 4096, 4096], [14336, 4096], []], "Ev Idx": 18983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940161179.774, "dur": 13.787, + "args": { + "External id": 986084,"Sequence number": 10552658, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[14336, 4096]], "Ev Idx": 18984 + } + }, + { + "ph": "s", "id": 261, "pid": 2338708, "tid": 2338708, "ts": 6345940161179.774, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940161187.552, "dur": 4.176, + "args": { + "External id": 986085,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[14336, 4096], [], []], "Ev Idx": 18985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940161190.115, "dur": 1.435, + "args": { + "External id": 986086,"Record function id": 0, "Concrete Inputs": ["", "[4096, 14336]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[14336, 4096], [], [], []], "Ev Idx": 18986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940161194.445, "dur": 89.194, + "args": { + "External id": 986087,"Sequence number": 10552659, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[16777216, 4096, 1], [1, 4096]], "Input Dims": [[8, 4096, 4096], [4096, 14336]], "Ev Idx": 18987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940161196.021, "dur": 9.694, + "args": { + "External id": 986088,"Sequence number": 10552659, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940161197.019, "dur": 8.530, + "args": { + "External id": 986089,"Sequence number": 10552659, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[32768, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 18989 + } + }, + { + "ph": "s", "id": 260, "pid": 2338708, "tid": 2338708, "ts": 6345940161197.019, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940161206.464, "dur": 68.050, + "args": { + "External id": 986090,"Sequence number": 10552660, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[32768, 4096], [4096, 14336]], "Ev Idx": 18990 + } + }, + { + "ph": "s", "id": 259, "pid": 2338708, "tid": 2338708, "ts": 6345940161206.464, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940161277.155, "dur": 5.675, + "args": { + "External id": 986091,"Sequence number": 10552661, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[14336, 1], []], "Input Dims": [[32768, 14336], []], "Ev Idx": 18991 + } + }, + { + "ph": "s", "id": 258, "pid": 2338708, "tid": 2338708, "ts": 6345940161277.155, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "SwiGLULinearFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940161311.673, "dur": 192.328, + "args": { + "External id": 986092,"Sequence number": 10552662, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18992 + } + }, + { + "ph": "s", "id": 257, "pid": 2338708, "tid": 2338708, "ts": 6345940161311.673, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940161362.380, "dur": 6.232, + "args": { + "External id": 986093,"Record function id": 0, "Concrete Inputs": ["[8, 4096, 14336]", "15", "", "", "", "0"], "Input type": ["ScalarList", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 18993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940161407.963, "dur": 78.196, + "args": { + "External id": 986094,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[58720256, 14336, 1], [14336, 1], []], "Input Dims": [[8, 4096, 14336], [4096, 14336], []], "Ev Idx": 18994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940161408.920, "dur": 9.339, + "args": { + "External id": 986095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[14336, 1]], "Input Dims": [[4096, 14336]], "Ev Idx": 18995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940161410.553, "dur": 6.452, + "args": { + "External id": 986096,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[14336, 1], [], []], "Input Dims": [[4096, 14336], [], []], "Ev Idx": 18996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940161415.648, "dur": 1.025, + "args": { + "External id": 986097,"Record function id": 0, "Concrete Inputs": ["", "[14336, 4096]", "[1, 14336]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[14336, 1], [], [], []], "Input Dims": [[4096, 14336], [], [], []], "Ev Idx": 18997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940161419.329, "dur": 66.388, + "args": { + "External id": 986098,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[58720256, 14336, 1], [1, 14336]], "Input Dims": [[8, 4096, 14336], [14336, 4096]], "Ev Idx": 18998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2338708, "tid": 2338708, + "ts": 6345940161421.166, "dur": 5.560, + "args": { + "External id": 986099,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 18999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940161425.228, "dur": 1.381, + "args": { + "External id": 986100,"Record function id": 0, "Concrete Inputs": ["", "[32768, 14336]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[58720256, 14336, 1], []], "Input Dims": [[8, 4096, 14336], []], "Ev Idx": 19000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940161427.475, "dur": 53.004, + "args": { + "External id": 986101,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[14336, 1], [1, 14336]], "Input Dims": [[32768, 14336], [14336, 4096]], "Ev Idx": 19001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_unsafe_view", "pid": 2338708, "tid": 2338708, + "ts": 6345940161483.554, "dur": 1.420, + "args": { + "External id": 986102,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345940161515.074, "dur": 29.007, + "args": { + "External id": 986103,"Sequence number": 10552663, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[16777216, 4096, 1], [16777216, 4096, 1], []], "Input Dims": [[8, 4096, 4096], [8, 4096, 4096], []], "Ev Idx": 19003 + } + }, + { + "ph": "s", "id": 256, "pid": 2338708, "tid": 2338708, "ts": 6345940161515.074, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338708, "tid": 2338708, + "ts": 6345940161566.706, "dur": 51.689, + "args": { + "External id": 986104,"Sequence number": 10552664, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "-2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 19004 + } + }, + { + "ph": "s", "id": 255, "pid": 2338708, "tid": 2338708, "ts": 6345940161566.706, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338708, "tid": 2338708, + "ts": 6345940161576.063, "dur": 36.692, + "args": { + "External id": 986105,"Record function id": 0, "Concrete Inputs": ["", "2"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1], [16777216, 4096, 1]], []], "Input Dims": [[[8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096], [8, 4096, 4096]], []], "Ev Idx": 19005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940161614.967, "dur": 1.514, + "args": { + "External id": 986106,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 16384], []], "Ev Idx": 19006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2338708, "tid": 2338708, + "ts": 6345940161660.455, "dur": 58.612, + "args": { + "External id": 986107,"Record function id": 0, "Ev Idx": 19007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 2338708, "tid": 2338708, + "ts": 6345940161720.603, "dur": 231.745, + "args": { + "External id": 986108,"Record function id": 0, "Ev Idx": 19008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940161763.148, "dur": 178.731, + "args": { + "External id": 986109,"Sequence number": 10552665, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [67108864, 16384, 4096, 1]], "Input Dims": [[4096], [8, 4096, 4, 4096]], "Ev Idx": 19009 + } + }, + { + "ph": "s", "id": 254, "pid": 2338708, "tid": 2338708, "ts": 6345940161763.148, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2338708, "tid": 2338708, + "ts": 6345940161848.884, "dur": 45.144, + "args": { + "External id": 986110,"kernel_hash": "cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "4096", "1", "9.9999999999999995e-07", "True", "4096", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/m6/cm6zhwcmjp7qsfqrrblhk5z2l4yw5p27fdso3w5n3u2tffsek47b.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[131072, 4096], [131072, 4096], [4096], [131072], [], [], [], [], [], [], [], [], []], "Ev Idx": 19010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345940162135.706, "dur": 76.108, + "args": { + "External id": 986111,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940162140.027, "dur": 8.848, + "args": { + "External id": 986112,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162171.874, "dur": 39.561, + "args": { + "External id": 986113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162175.577, "dur": 35.174, + "args": { + "External id": 986114,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345940162218.230, "dur": 20.050, + "args": { + "External id": 986115,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940162219.538, "dur": 2.237, + "args": { + "External id": 986116,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162222.494, "dur": 15.474, + "args": { + "External id": 986117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162225.776, "dur": 11.633, + "args": { + "External id": 986118,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345940162241.648, "dur": 15.607, + "args": { + "External id": 986119,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False"], "Input type": ["ScalarList", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940162242.594, "dur": 2.092, + "args": { + "External id": 986120,"Record function id": 0, "Concrete Inputs": ["[1]", "", "", "", "False", ""], "Input type": ["ScalarList", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162245.258, "dur": 11.685, + "args": { + "External id": 986121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[1]], "Ev Idx": 19021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162245.964, "dur": 10.651, + "args": { + "External id": 986122,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 19022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345940162270.643, "dur": 0.914, + "args": { + "External id": 986123,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[8, 8192], [], [], [], [], [], [], []], "Ev Idx": 19023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2338708, "tid": 2338708, + "ts": 6345940162281.213, "dur": 14.262, + "args": { + "External id": 986124,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "5", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 19024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162290.707, "dur": 2.453, + "args": { + "External id": 986125,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 8192], [], [], []], "Ev Idx": 19025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940162303.575, "dur": 8.958, + "args": { + "External id": 986126,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162309.270, "dur": 0.985, + "args": { + "External id": 986127,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940162313.935, "dur": 4.513, + "args": { + "External id": 986128,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162316.264, "dur": 0.556, + "args": { + "External id": 986129,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 5]", "[8192, 1, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940162320.283, "dur": 3.664, + "args": { + "External id": 986130,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 5], [], [], [], []], "Ev Idx": 19030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162322.846, "dur": 0.470, + "args": { + "External id": 986131,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 5], [], [], []], "Ev Idx": 19031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940162327.897, "dur": 5.839, + "args": { + "External id": 986132,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 19032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162330.260, "dur": 2.833, + "args": { + "External id": 986133,"Record function id": 0, "Concrete Inputs": ["", "[8, 8188, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 19033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940162335.062, "dur": 3.795, + "args": { + "External id": 986134,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 8188, 4], [], [], [], []], "Ev Idx": 19034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162337.324, "dur": 0.431, + "args": { + "External id": 986135,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 8188, 4], [], [], []], "Ev Idx": 19035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940162340.059, "dur": 2.968, + "args": { + "External id": 986136,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4096, 4], [], [], [], []], "Ev Idx": 19036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162341.976, "dur": 0.426, + "args": { + "External id": 986137,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 19037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940162348.201, "dur": 7.013, + "args": { + "External id": 986138,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4096, 4], [], []], "Ev Idx": 19038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162353.662, "dur": 0.536, + "args": { + "External id": 986139,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4096, 4], [], [], []], "Ev Idx": 19039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940162360.538, "dur": 3.183, + "args": { + "External id": 986140,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162362.558, "dur": 0.464, + "args": { + "External id": 986141,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345940162367.665, "dur": 12.144, + "args": { + "External id": 986142,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162377.923, "dur": 0.653, + "args": { + "External id": 986143,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940162381.049, "dur": 2.612, + "args": { + "External id": 986144,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162382.614, "dur": 0.389, + "args": { + "External id": 986145,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940162386.512, "dur": 7.565, + "args": { + "External id": 986146,"Sequence number": 10552666, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19046 + } + }, + { + "ph": "s", "id": 253, "pid": 2338708, "tid": 2338708, "ts": 6345940162386.512, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162391.356, "dur": 0.667, + "args": { + "External id": 986147,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940162397.795, "dur": 7.450, + "args": { + "External id": 986148,"Sequence number": 10552667, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19048 + } + }, + { + "ph": "s", "id": 252, "pid": 2338708, "tid": 2338708, "ts": 6345940162397.795, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162401.329, "dur": 2.960, + "args": { + "External id": 986149,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345940162406.519, "dur": 5.840, + "args": { + "External id": 986150,"Sequence number": 10552668, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19050 + } + }, + { + "ph": "s", "id": 251, "pid": 2338708, "tid": 2338708, "ts": 6345940162406.519, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162410.804, "dur": 0.563, + "args": { + "External id": 986151,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940162413.589, "dur": 7.613, + "args": { + "External id": 986152,"Sequence number": 10552669, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19052 + } + }, + { + "ph": "s", "id": 250, "pid": 2338708, "tid": 2338708, "ts": 6345940162413.589, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162419.715, "dur": 0.541, + "args": { + "External id": 986153,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345940162426.203, "dur": 46.356, + "args": { + "External id": 986154,"Sequence number": 10552670, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345940162430.957, "dur": 41.155, + "args": { + "External id": 986155,"Sequence number": 10552670, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940162434.271, "dur": 6.966, + "args": { + "External id": 986156,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940162436.200, "dur": 4.421, + "args": { + "External id": 986157,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162442.982, "dur": 28.554, + "args": { + "External id": 986158,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940162506.353, "dur": 5.484, + "args": { + "External id": 986159,"Sequence number": 10552670, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19059 + } + }, + { + "ph": "s", "id": 249, "pid": 2338708, "tid": 2338708, "ts": 6345940162506.353, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940162514.683, "dur": 1.539, + "args": { + "External id": 986160,"Sequence number": 10552671, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940162552.549, "dur": 123016.544, + "args": { + "External id": 986161,"Sequence number": 10552671, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19061 + } + }, + { + "ph": "s", "id": 248, "pid": 2338708, "tid": 2338708, "ts": 6345940162552.549, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345940162570.202, "dur": 36.392, + "args": { + "External id": 986162,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345940162570.805, "dur": 35.518, + "args": { + "External id": 986163,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940162572.756, "dur": 7.443, + "args": { + "External id": 986164,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940162576.398, "dur": 3.305, + "args": { + "External id": 986165,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162581.071, "dur": 24.708, + "args": { + "External id": 986166,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940162628.022, "dur": 29.377, + "args": { + "External id": 986167,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940162629.433, "dur": 7.372, + "args": { + "External id": 986168,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162632.176, "dur": 4.053, + "args": { + "External id": 986169,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162637.778, "dur": 19.365, + "args": { + "External id": 986170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162638.802, "dur": 17.841, + "args": { + "External id": 986171,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940162661.519, "dur": 23.963, + "args": { + "External id": 986172,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940162662.636, "dur": 9.714, + "args": { + "External id": 986173,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162666.523, "dur": 5.551, + "args": { + "External id": 986174,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162673.069, "dur": 12.173, + "args": { + "External id": 986175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162673.656, "dur": 11.174, + "args": { + "External id": 986176,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345940162690.910, "dur": 18.334, + "args": { + "External id": 986177,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940162692.086, "dur": 4.462, + "args": { + "External id": 986178,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162697.272, "dur": 11.660, + "args": { + "External id": 986179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162697.963, "dur": 10.600, + "args": { + "External id": 986180,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2338708, + "ts": 6345940162721.460, "dur": 39.323, + "args": { + "External id": 986181,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940162767.054, "dur": 76.204, + "args": { + "External id": 986182,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940162771.205, "dur": 71.385, + "args": { + "External id": 986183,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162782.020, "dur": 0.801, + "args": { + "External id": 986184,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345940162787.054, "dur": 31.044, + "args": { + "External id": 986185,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940162789.134, "dur": 28.719, + "args": { + "External id": 986186,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940162792.496, "dur": 3.630, + "args": { + "External id": 986187,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940162797.176, "dur": 20.122, + "args": { + "External id": 986188,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2338708, + "ts": 6345940162851.177, "dur": 115458.593, + "args": { + "External id": 986189,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2338708, + "ts": 6345940162853.677, "dur": 115453.567, + "args": { + "External id": 986190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940278330.573, "dur": 13.726, + "args": { + "External id": 986191,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940278339.382, "dur": 1.967, + "args": { + "External id": 986192,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940278351.504, "dur": 135.834, + "args": { + "External id": 986193,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940278353.913, "dur": 9.755, + "args": { + "External id": 986194,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940278357.395, "dur": 4.982, + "args": { + "External id": 986195,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940278360.767, "dur": 1.299, + "args": { + "External id": 986196,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940278365.210, "dur": 121.251, + "args": { + "External id": 986197,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940278367.439, "dur": 117.777, + "args": { + "External id": 986198,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940278495.447, "dur": 5.727, + "args": { + "External id": 986199,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940278498.355, "dur": 1.242, + "args": { + "External id": 986200,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940278515.023, "dur": 6.928, + "args": { + "External id": 986201,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940278537.191, "dur": 12.014, + "args": { + "External id": 986202,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940278540.744, "dur": 8.104, + "args": { + "External id": 986203,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940278740.804, "dur": 372.446, + "args": { + "External id": 986204,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940278746.721, "dur": 2.185, + "args": { + "External id": 986205,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940278752.077, "dur": 360.431, + "args": { + "External id": 986206,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940278756.236, "dur": 1.380, + "args": { + "External id": 986207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940278761.260, "dur": 41.338, + "args": { + "External id": 986208,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940278806.175, "dur": 4.762, + "args": { + "External id": 986209,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940278809.600, "dur": 0.883, + "args": { + "External id": 986210,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940278813.968, "dur": 41.373, + "args": { + "External id": 986211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940278816.112, "dur": 1.425, + "args": { + "External id": 986212,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940278820.540, "dur": 34.409, + "args": { + "External id": 986213,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940278831.887, "dur": 4.649, + "args": { + "External id": 986214,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940278858.066, "dur": 32.480, + "args": { + "External id": 986215,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940278894.357, "dur": 24.640, + "args": { + "External id": 986216,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940278924.027, "dur": 23.045, + "args": { + "External id": 986217,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940278950.003, "dur": 20.936, + "args": { + "External id": 986218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940278973.662, "dur": 27.775, + "args": { + "External id": 986219,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940278976.668, "dur": 2.198, + "args": { + "External id": 986220,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940278981.610, "dur": 0.905, + "args": { + "External id": 986221,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940279006.535, "dur": 39.061, + "args": { + "External id": 986222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940279049.060, "dur": 60.671, + "args": { + "External id": 986223,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940279126.163, "dur": 3.557, + "args": { + "External id": 986224,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940279140.432, "dur": 5.388, + "args": { + "External id": 986225,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940279144.006, "dur": 0.642, + "args": { + "External id": 986226,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940279260.427, "dur": 92.406, + "args": { + "External id": 986227,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940279360.127, "dur": 12.010, + "args": { + "External id": 986228,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940279366.175, "dur": 4.313, + "args": { + "External id": 986229,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940279374.211, "dur": 41.797, + "args": { + "External id": 986230,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940279423.051, "dur": 8.295, + "args": { + "External id": 986231,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940279425.610, "dur": 4.816, + "args": { + "External id": 986232,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940279428.810, "dur": 1.337, + "args": { + "External id": 986233,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940279436.810, "dur": 58.092, + "args": { + "External id": 986234,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940279438.468, "dur": 55.793, + "args": { + "External id": 986235,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940279504.560, "dur": 22.916, + "args": { + "External id": 986236,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940279535.782, "dur": 5.280, + "args": { + "External id": 986237,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940279539.024, "dur": 0.783, + "args": { + "External id": 986238,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940279546.754, "dur": 58.408, + "args": { + "External id": 986239,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940279548.019, "dur": 9.735, + "args": { + "External id": 986240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940279548.834, "dur": 8.158, + "args": { + "External id": 986241,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940279553.424, "dur": 3.301, + "args": { + "External id": 986242,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940279558.378, "dur": 46.304, + "args": { + "External id": 986243,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940279558.933, "dur": 44.915, + "args": { + "External id": 986244,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940279610.584, "dur": 4.343, + "args": { + "External id": 986245,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940279612.901, "dur": 0.532, + "args": { + "External id": 986246,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940279623.079, "dur": 2.247, + "args": { + "External id": 986247,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940279635.281, "dur": 10.400, + "args": { + "External id": 986248,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940279640.421, "dur": 4.881, + "args": { + "External id": 986249,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940279768.491, "dur": 218.511, + "args": { + "External id": 986250,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940279771.897, "dur": 2.102, + "args": { + "External id": 986251,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940279775.688, "dur": 210.753, + "args": { + "External id": 986252,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940279777.651, "dur": 0.503, + "args": { + "External id": 986253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940279782.135, "dur": 25.148, + "args": { + "External id": 986254,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940279809.346, "dur": 7.155, + "args": { + "External id": 986255,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940279812.342, "dur": 3.809, + "args": { + "External id": 986256,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940279820.510, "dur": 27.323, + "args": { + "External id": 986257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940279821.488, "dur": 3.920, + "args": { + "External id": 986258,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940279827.245, "dur": 20.258, + "args": { + "External id": 986259,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940279830.814, "dur": 3.008, + "args": { + "External id": 986260,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940279849.643, "dur": 24.412, + "args": { + "External id": 986261,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940279876.256, "dur": 16.575, + "args": { + "External id": 986262,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940279896.029, "dur": 14.838, + "args": { + "External id": 986263,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940279912.677, "dur": 14.895, + "args": { + "External id": 986264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940279930.101, "dur": 26.506, + "args": { + "External id": 986265,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940279932.609, "dur": 2.563, + "args": { + "External id": 986266,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940279940.104, "dur": 0.585, + "args": { + "External id": 986267,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940279958.259, "dur": 14.083, + "args": { + "External id": 986268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940279973.824, "dur": 11.226, + "args": { + "External id": 986269,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940279995.285, "dur": 2.597, + "args": { + "External id": 986270,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940280031.278, "dur": 6.144, + "args": { + "External id": 986271,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940280034.956, "dur": 0.919, + "args": { + "External id": 986272,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940280182.156, "dur": 81.762, + "args": { + "External id": 986273,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940280271.399, "dur": 10.508, + "args": { + "External id": 986274,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940280278.489, "dur": 1.496, + "args": { + "External id": 986275,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940280283.716, "dur": 30.291, + "args": { + "External id": 986276,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940280320.526, "dur": 10.124, + "args": { + "External id": 986277,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940280322.498, "dur": 7.126, + "args": { + "External id": 986278,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940280325.330, "dur": 3.963, + "args": { + "External id": 986279,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940280334.358, "dur": 57.528, + "args": { + "External id": 986280,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940280338.169, "dur": 52.757, + "args": { + "External id": 986281,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940280397.629, "dur": 18.841, + "args": { + "External id": 986282,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940280425.036, "dur": 9.583, + "args": { + "External id": 986283,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940280432.585, "dur": 0.784, + "args": { + "External id": 986284,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940280440.352, "dur": 67.934, + "args": { + "External id": 986285,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940280441.383, "dur": 6.631, + "args": { + "External id": 986286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940280444.529, "dur": 2.709, + "args": { + "External id": 986287,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940280446.210, "dur": 0.792, + "args": { + "External id": 986288,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940280448.869, "dur": 58.867, + "args": { + "External id": 986289,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940280449.583, "dur": 57.436, + "args": { + "External id": 986290,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940280513.617, "dur": 8.139, + "args": { + "External id": 986291,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940280516.730, "dur": 3.452, + "args": { + "External id": 986292,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940280530.504, "dur": 2.098, + "args": { + "External id": 986293,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940280546.031, "dur": 10.046, + "args": { + "External id": 986294,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940280548.554, "dur": 7.122, + "args": { + "External id": 986295,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940280671.767, "dur": 222.841, + "args": { + "External id": 986296,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940280674.213, "dur": 2.288, + "args": { + "External id": 986297,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940280679.049, "dur": 214.667, + "args": { + "External id": 986298,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940280681.111, "dur": 0.446, + "args": { + "External id": 986299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940280685.803, "dur": 27.928, + "args": { + "External id": 986300,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940280716.123, "dur": 3.991, + "args": { + "External id": 986301,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940280718.901, "dur": 0.928, + "args": { + "External id": 986302,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940280721.329, "dur": 28.187, + "args": { + "External id": 986303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940280722.996, "dur": 1.975, + "args": { + "External id": 986304,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940280726.882, "dur": 22.247, + "args": { + "External id": 986305,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940280732.440, "dur": 3.467, + "args": { + "External id": 986306,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940280751.412, "dur": 27.554, + "args": { + "External id": 986307,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940280781.251, "dur": 16.108, + "args": { + "External id": 986308,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940280801.643, "dur": 16.757, + "args": { + "External id": 986309,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940280822.883, "dur": 14.659, + "args": { + "External id": 986310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940280839.711, "dur": 24.012, + "args": { + "External id": 986311,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940280842.536, "dur": 2.140, + "args": { + "External id": 986312,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940280846.938, "dur": 0.974, + "args": { + "External id": 986313,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940280865.448, "dur": 14.061, + "args": { + "External id": 986314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940280880.779, "dur": 11.737, + "args": { + "External id": 986315,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940280906.038, "dur": 2.128, + "args": { + "External id": 986316,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940280919.666, "dur": 4.866, + "args": { + "External id": 986317,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940280922.883, "dur": 0.639, + "args": { + "External id": 986318,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940281005.705, "dur": 136.889, + "args": { + "External id": 986319,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940281151.466, "dur": 8.505, + "args": { + "External id": 986320,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940281156.444, "dur": 1.697, + "args": { + "External id": 986321,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940281161.900, "dur": 32.654, + "args": { + "External id": 986322,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940281201.939, "dur": 8.783, + "args": { + "External id": 986323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940281206.458, "dur": 3.430, + "args": { + "External id": 986324,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940281208.613, "dur": 0.950, + "args": { + "External id": 986325,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940281214.607, "dur": 53.283, + "args": { + "External id": 986326,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940281216.207, "dur": 50.900, + "args": { + "External id": 986327,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940281272.953, "dur": 20.183, + "args": { + "External id": 986328,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940281301.036, "dur": 8.141, + "args": { + "External id": 986329,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940281307.105, "dur": 0.786, + "args": { + "External id": 986330,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940281314.222, "dur": 57.426, + "args": { + "External id": 986331,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940281315.416, "dur": 6.911, + "args": { + "External id": 986332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940281316.556, "dur": 4.975, + "args": { + "External id": 986333,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940281318.349, "dur": 2.964, + "args": { + "External id": 986334,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940281323.049, "dur": 47.981, + "args": { + "External id": 986335,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940281323.927, "dur": 46.399, + "args": { + "External id": 986336,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940281379.316, "dur": 5.416, + "args": { + "External id": 986337,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940281382.237, "dur": 0.849, + "args": { + "External id": 986338,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940281392.881, "dur": 1.767, + "args": { + "External id": 986339,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940281405.852, "dur": 7.533, + "args": { + "External id": 986340,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940281408.148, "dur": 4.877, + "args": { + "External id": 986341,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940281531.360, "dur": 235.495, + "args": { + "External id": 986342,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940281534.235, "dur": 5.064, + "args": { + "External id": 986343,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940281541.183, "dur": 225.257, + "args": { + "External id": 986344,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940281543.004, "dur": 0.664, + "args": { + "External id": 986345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940281547.795, "dur": 27.614, + "args": { + "External id": 986346,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940281577.429, "dur": 3.540, + "args": { + "External id": 986347,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940281579.953, "dur": 0.609, + "args": { + "External id": 986348,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940281582.051, "dur": 31.923, + "args": { + "External id": 986349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940281584.013, "dur": 3.303, + "args": { + "External id": 986350,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940281589.035, "dur": 24.525, + "args": { + "External id": 986351,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940281595.350, "dur": 3.350, + "args": { + "External id": 986352,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940281615.729, "dur": 24.360, + "args": { + "External id": 986353,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940281642.125, "dur": 23.116, + "args": { + "External id": 986354,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940281668.420, "dur": 16.673, + "args": { + "External id": 986355,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940281687.126, "dur": 16.093, + "args": { + "External id": 986356,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940281705.298, "dur": 25.535, + "args": { + "External id": 986357,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940281708.172, "dur": 1.983, + "args": { + "External id": 986358,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940281712.860, "dur": 1.054, + "args": { + "External id": 986359,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940281735.131, "dur": 15.246, + "args": { + "External id": 986360,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940281751.731, "dur": 13.396, + "args": { + "External id": 986361,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940281774.914, "dur": 2.537, + "args": { + "External id": 986362,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940281788.371, "dur": 3.986, + "args": { + "External id": 986363,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940281790.771, "dur": 0.600, + "args": { + "External id": 986364,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940281872.011, "dur": 58.152, + "args": { + "External id": 986365,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940281935.874, "dur": 5.726, + "args": { + "External id": 986366,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940281939.348, "dur": 0.805, + "args": { + "External id": 986367,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940281943.129, "dur": 27.951, + "args": { + "External id": 986368,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940281978.878, "dur": 8.463, + "args": { + "External id": 986369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940281980.309, "dur": 6.196, + "args": { + "External id": 986370,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940281982.727, "dur": 3.558, + "args": { + "External id": 986371,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940281990.251, "dur": 112.544, + "args": { + "External id": 986372,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940281991.634, "dur": 109.781, + "args": { + "External id": 986373,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940282110.036, "dur": 21.241, + "args": { + "External id": 986374,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940282139.689, "dur": 9.591, + "args": { + "External id": 986375,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940282146.379, "dur": 1.383, + "args": { + "External id": 986376,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940282154.529, "dur": 68.061, + "args": { + "External id": 986377,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940282155.896, "dur": 4.958, + "args": { + "External id": 986378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940282157.068, "dur": 2.920, + "args": { + "External id": 986379,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940282158.899, "dur": 0.730, + "args": { + "External id": 986380,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940282161.729, "dur": 60.274, + "args": { + "External id": 986381,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940282165.421, "dur": 55.761, + "args": { + "External id": 986382,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940282227.549, "dur": 5.688, + "args": { + "External id": 986383,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940282230.782, "dur": 0.849, + "args": { + "External id": 986384,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940282241.545, "dur": 2.148, + "args": { + "External id": 986385,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940282253.182, "dur": 8.211, + "args": { + "External id": 986386,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940282255.714, "dur": 5.394, + "args": { + "External id": 986387,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940282373.291, "dur": 226.306, + "args": { + "External id": 986388,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940282375.541, "dur": 2.462, + "args": { + "External id": 986389,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940282382.785, "dur": 216.269, + "args": { + "External id": 986390,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940282384.262, "dur": 0.564, + "args": { + "External id": 986391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940282386.345, "dur": 26.518, + "args": { + "External id": 986392,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940282414.875, "dur": 6.214, + "args": { + "External id": 986393,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940282417.500, "dur": 3.241, + "args": { + "External id": 986394,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940282422.265, "dur": 28.907, + "args": { + "External id": 986395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940282423.561, "dur": 1.480, + "args": { + "External id": 986396,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940282429.226, "dur": 21.581, + "args": { + "External id": 986397,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940282432.333, "dur": 3.994, + "args": { + "External id": 986398,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940282452.941, "dur": 27.582, + "args": { + "External id": 986399,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940282482.214, "dur": 16.816, + "args": { + "External id": 986400,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940282502.623, "dur": 17.157, + "args": { + "External id": 986401,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940282521.340, "dur": 15.715, + "args": { + "External id": 986402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940282539.586, "dur": 26.762, + "args": { + "External id": 986403,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940282542.400, "dur": 1.699, + "args": { + "External id": 986404,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940282548.421, "dur": 1.058, + "args": { + "External id": 986405,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940282567.902, "dur": 15.031, + "args": { + "External id": 986406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940282584.332, "dur": 13.674, + "args": { + "External id": 986407,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940282607.645, "dur": 2.220, + "args": { + "External id": 986408,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940282620.167, "dur": 4.115, + "args": { + "External id": 986409,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940282622.877, "dur": 0.393, + "args": { + "External id": 986410,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940282700.064, "dur": 58.356, + "args": { + "External id": 986411,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940282769.782, "dur": 11.013, + "args": { + "External id": 986412,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940282776.016, "dur": 3.257, + "args": { + "External id": 986413,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940282782.424, "dur": 28.653, + "args": { + "External id": 986414,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940282816.385, "dur": 5.843, + "args": { + "External id": 986415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940282817.819, "dur": 3.577, + "args": { + "External id": 986416,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940282819.984, "dur": 1.055, + "args": { + "External id": 986417,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940282825.029, "dur": 52.801, + "args": { + "External id": 986418,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940282828.864, "dur": 47.953, + "args": { + "External id": 986419,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940282882.526, "dur": 18.006, + "args": { + "External id": 986420,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940282907.146, "dur": 4.985, + "args": { + "External id": 986421,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940282910.109, "dur": 0.813, + "args": { + "External id": 986422,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940282916.155, "dur": 57.721, + "args": { + "External id": 986423,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940282917.204, "dur": 6.687, + "args": { + "External id": 986424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940282917.979, "dur": 5.236, + "args": { + "External id": 986425,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940282922.342, "dur": 0.722, + "args": { + "External id": 986426,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940282924.515, "dur": 48.856, + "args": { + "External id": 986427,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940282927.875, "dur": 44.786, + "args": { + "External id": 986428,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940282978.635, "dur": 4.410, + "args": { + "External id": 986429,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940282980.951, "dur": 0.729, + "args": { + "External id": 986430,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940282989.171, "dur": 1.840, + "args": { + "External id": 986431,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940283003.291, "dur": 31.758, + "args": { + "External id": 986432,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940283005.771, "dur": 28.494, + "args": { + "External id": 986433,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940283192.523, "dur": 236.403, + "args": { + "External id": 986434,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940283195.905, "dur": 3.121, + "args": { + "External id": 986435,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940283201.413, "dur": 227.041, + "args": { + "External id": 986436,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940283203.616, "dur": 0.676, + "args": { + "External id": 986437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940283205.662, "dur": 33.055, + "args": { + "External id": 986438,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940283243.424, "dur": 4.881, + "args": { + "External id": 986439,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940283246.420, "dur": 1.380, + "args": { + "External id": 986440,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940283249.702, "dur": 29.164, + "args": { + "External id": 986441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940283251.023, "dur": 1.529, + "args": { + "External id": 986442,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940283254.461, "dur": 24.018, + "args": { + "External id": 986443,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940283260.085, "dur": 2.654, + "args": { + "External id": 986444,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940283280.755, "dur": 25.747, + "args": { + "External id": 986445,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940283308.558, "dur": 17.578, + "args": { + "External id": 986446,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940283329.932, "dur": 17.320, + "args": { + "External id": 986447,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940283348.986, "dur": 16.989, + "args": { + "External id": 986448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940283368.112, "dur": 27.935, + "args": { + "External id": 986449,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940283373.332, "dur": 1.883, + "args": { + "External id": 986450,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940283378.079, "dur": 0.680, + "args": { + "External id": 986451,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940283397.579, "dur": 14.971, + "args": { + "External id": 986452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940283414.063, "dur": 12.690, + "args": { + "External id": 986453,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940283438.475, "dur": 2.658, + "args": { + "External id": 986454,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940283452.293, "dur": 4.699, + "args": { + "External id": 986455,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940283455.393, "dur": 0.630, + "args": { + "External id": 986456,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940283541.800, "dur": 74.003, + "args": { + "External id": 986457,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940283621.861, "dur": 5.578, + "args": { + "External id": 986458,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940283625.296, "dur": 0.696, + "args": { + "External id": 986459,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940283629.623, "dur": 30.785, + "args": { + "External id": 986460,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940283665.970, "dur": 13.811, + "args": { + "External id": 986461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940283667.707, "dur": 11.070, + "args": { + "External id": 986462,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940283677.540, "dur": 0.990, + "args": { + "External id": 986463,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940283683.184, "dur": 52.868, + "args": { + "External id": 986464,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940283684.677, "dur": 50.615, + "args": { + "External id": 986465,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940283741.120, "dur": 18.769, + "args": { + "External id": 986466,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940283766.962, "dur": 5.242, + "args": { + "External id": 986467,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940283770.350, "dur": 0.660, + "args": { + "External id": 986468,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940283777.412, "dur": 61.935, + "args": { + "External id": 986469,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940283781.085, "dur": 7.374, + "args": { + "External id": 986470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940283782.270, "dur": 5.430, + "args": { + "External id": 986471,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940283784.288, "dur": 3.197, + "args": { + "External id": 986472,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940283789.257, "dur": 49.609, + "args": { + "External id": 986473,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940283789.977, "dur": 48.275, + "args": { + "External id": 986474,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940283844.419, "dur": 5.441, + "args": { + "External id": 986475,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940283847.567, "dur": 0.906, + "args": { + "External id": 986476,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940283859.245, "dur": 1.769, + "args": { + "External id": 986477,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940283870.677, "dur": 6.793, + "args": { + "External id": 986478,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940283873.101, "dur": 4.057, + "args": { + "External id": 986479,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940283981.775, "dur": 300.039, + "args": { + "External id": 986480,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940283984.466, "dur": 1.748, + "args": { + "External id": 986481,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940283988.502, "dur": 292.608, + "args": { + "External id": 986482,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940283992.756, "dur": 0.528, + "args": { + "External id": 986483,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940283996.907, "dur": 46.397, + "args": { + "External id": 986484,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940284046.514, "dur": 3.714, + "args": { + "External id": 986485,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940284048.942, "dur": 0.955, + "args": { + "External id": 986486,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940284051.444, "dur": 71.633, + "args": { + "External id": 986487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940284053.127, "dur": 42.028, + "args": { + "External id": 986488,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940284098.353, "dur": 24.409, + "args": { + "External id": 986489,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940284102.093, "dur": 3.705, + "args": { + "External id": 986490,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940284124.670, "dur": 31.753, + "args": { + "External id": 986491,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940284158.608, "dur": 16.852, + "args": { + "External id": 986492,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940284181.534, "dur": 17.617, + "args": { + "External id": 986493,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940284200.801, "dur": 16.261, + "args": { + "External id": 986494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940284219.277, "dur": 25.744, + "args": { + "External id": 986495,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940284221.962, "dur": 2.003, + "args": { + "External id": 986496,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940284226.521, "dur": 0.667, + "args": { + "External id": 986497,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940284246.862, "dur": 15.316, + "args": { + "External id": 986498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940284266.315, "dur": 13.279, + "args": { + "External id": 986499,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940284291.947, "dur": 3.163, + "args": { + "External id": 986500,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940284307.068, "dur": 4.616, + "args": { + "External id": 986501,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940284310.032, "dur": 0.611, + "args": { + "External id": 986502,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940284402.580, "dur": 70.190, + "args": { + "External id": 986503,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940284479.034, "dur": 5.946, + "args": { + "External id": 986504,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940284482.712, "dur": 0.821, + "args": { + "External id": 986505,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940284486.740, "dur": 28.642, + "args": { + "External id": 986506,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940284524.343, "dur": 8.410, + "args": { + "External id": 986507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940284526.026, "dur": 5.898, + "args": { + "External id": 986508,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940284528.324, "dur": 3.226, + "args": { + "External id": 986509,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940284536.116, "dur": 50.750, + "args": { + "External id": 986510,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940284537.336, "dur": 48.524, + "args": { + "External id": 986511,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940284591.687, "dur": 19.041, + "args": { + "External id": 986512,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940284616.916, "dur": 7.648, + "args": { + "External id": 986513,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940284622.572, "dur": 0.852, + "args": { + "External id": 986514,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940284629.699, "dur": 56.366, + "args": { + "External id": 986515,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940284630.621, "dur": 4.817, + "args": { + "External id": 986516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940284631.671, "dur": 3.081, + "args": { + "External id": 986517,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940284633.316, "dur": 1.291, + "args": { + "External id": 986518,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940284636.257, "dur": 49.306, + "args": { + "External id": 986519,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940284639.982, "dur": 44.884, + "args": { + "External id": 986520,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940284690.699, "dur": 4.545, + "args": { + "External id": 986521,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940284693.417, "dur": 0.542, + "args": { + "External id": 986522,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940284701.699, "dur": 1.644, + "args": { + "External id": 986523,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940284713.210, "dur": 7.314, + "args": { + "External id": 986524,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940284715.696, "dur": 4.500, + "args": { + "External id": 986525,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940284821.914, "dur": 280.876, + "args": { + "External id": 986526,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940284824.526, "dur": 5.239, + "args": { + "External id": 986527,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940284833.792, "dur": 268.497, + "args": { + "External id": 986528,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940284835.863, "dur": 0.376, + "args": { + "External id": 986529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940284837.390, "dur": 24.452, + "args": { + "External id": 986530,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940284863.668, "dur": 9.030, + "args": { + "External id": 986531,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940284869.310, "dur": 3.127, + "args": { + "External id": 986532,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940284873.910, "dur": 27.950, + "args": { + "External id": 986533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940284877.790, "dur": 1.347, + "args": { + "External id": 986534,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940284880.551, "dur": 20.906, + "args": { + "External id": 986535,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940284883.704, "dur": 2.446, + "args": { + "External id": 986536,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940284903.285, "dur": 23.623, + "args": { + "External id": 986537,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940284928.625, "dur": 17.347, + "args": { + "External id": 986538,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940284948.756, "dur": 15.061, + "args": { + "External id": 986539,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940284965.177, "dur": 14.770, + "args": { + "External id": 986540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940284981.737, "dur": 43.758, + "args": { + "External id": 986541,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940284983.995, "dur": 1.714, + "args": { + "External id": 986542,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940284990.348, "dur": 0.926, + "args": { + "External id": 986543,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940285028.413, "dur": 18.106, + "args": { + "External id": 986544,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940285047.877, "dur": 51.309, + "args": { + "External id": 986545,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940285114.124, "dur": 2.974, + "args": { + "External id": 986546,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940285129.084, "dur": 4.842, + "args": { + "External id": 986547,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940285132.125, "dur": 0.601, + "args": { + "External id": 986548,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940285219.600, "dur": 69.965, + "args": { + "External id": 986549,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940285295.698, "dur": 11.465, + "args": { + "External id": 986550,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940285301.925, "dur": 3.759, + "args": { + "External id": 986551,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940285308.718, "dur": 30.315, + "args": { + "External id": 986552,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940285344.545, "dur": 6.719, + "args": { + "External id": 986553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940285346.511, "dur": 3.660, + "args": { + "External id": 986554,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940285349.120, "dur": 0.760, + "args": { + "External id": 986555,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940285354.282, "dur": 50.194, + "args": { + "External id": 986556,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940285355.694, "dur": 48.144, + "args": { + "External id": 986557,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940285411.475, "dur": 20.148, + "args": { + "External id": 986558,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940285437.651, "dur": 30.971, + "args": { + "External id": 986559,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940285440.937, "dur": 27.241, + "args": { + "External id": 986560,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940285447.018, "dur": 1.027, + "args": { + "External id": 986561,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345940285475.496, "dur": 36.824, + "args": { + "External id": 986562,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940285477.846, "dur": 34.179, + "args": { + "External id": 986563,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 19463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940285483.642, "dur": 5.507, + "args": { + "External id": 986564,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940285490.619, "dur": 20.847, + "args": { + "External id": 986565,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940285531.247, "dur": 6.508, + "args": { + "External id": 986566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940285534.086, "dur": 3.360, + "args": { + "External id": 986567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940285539.154, "dur": 1.706, + "args": { + "External id": 986568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940285540.096, "dur": 0.686, + "args": { + "External id": 986569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940285589.562, "dur": 28.449, + "args": { + "External id": 986570,"Sequence number": 10552672, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19470 + } + }, + { + "ph": "s", "id": 247, "pid": 2338708, "tid": 2338708, "ts": 6345940285589.562, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940285625.215, "dur": 7.337, + "args": { + "External id": 986571,"Sequence number": 10552673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940285629.510, "dur": 1.348, + "args": { + "External id": 986572,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345940285638.180, "dur": 7.018, + "args": { + "External id": 986573,"Sequence number": 10552673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940285643.138, "dur": 0.669, + "args": { + "External id": 986574,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940285646.828, "dur": 3.483, + "args": { + "External id": 986575,"Sequence number": 10552673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940285648.896, "dur": 0.630, + "args": { + "External id": 986576,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "2"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940285655.405, "dur": 9.675, + "args": { + "External id": 986577,"Sequence number": 10552673, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19477 + } + }, + { + "ph": "s", "id": 246, "pid": 2338708, "tid": 2338708, "ts": 6345940285655.405, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940285662.198, "dur": 1.027, + "args": { + "External id": 986578,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940285666.536, "dur": 6.056, + "args": { + "External id": 986579,"Sequence number": 10552674, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19479 + } + }, + { + "ph": "s", "id": 245, "pid": 2338708, "tid": 2338708, "ts": 6345940285666.536, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940285671.205, "dur": 0.326, + "args": { + "External id": 986580,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345940285674.151, "dur": 8.922, + "args": { + "External id": 986581,"Sequence number": 10552675, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19481 + } + }, + { + "ph": "s", "id": 244, "pid": 2338708, "tid": 2338708, "ts": 6345940285674.151, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940285681.407, "dur": 0.573, + "args": { + "External id": 986582,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940285684.637, "dur": 8.032, + "args": { + "External id": 986583,"Sequence number": 10552676, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19483 + } + }, + { + "ph": "s", "id": 243, "pid": 2338708, "tid": 2338708, "ts": 6345940285684.637, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940285688.218, "dur": 3.425, + "args": { + "External id": 986584,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "4096"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345940285697.396, "dur": 37.257, + "args": { + "External id": 986585,"Sequence number": 10552677, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345940285699.551, "dur": 34.876, + "args": { + "External id": 986586,"Sequence number": 10552677, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940285702.816, "dur": 7.691, + "args": { + "External id": 986587,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940285705.595, "dur": 4.301, + "args": { + "External id": 986588,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940285711.510, "dur": 22.444, + "args": { + "External id": 986589,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940285767.727, "dur": 4.613, + "args": { + "External id": 986590,"Sequence number": 10552677, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19490 + } + }, + { + "ph": "s", "id": 242, "pid": 2338708, "tid": 2338708, "ts": 6345940285767.727, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940285777.766, "dur": 4.059, + "args": { + "External id": 986591,"Sequence number": 10552678, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940285821.113, "dur": 46741.630, + "args": { + "External id": 986592,"Sequence number": 10552678, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19492 + } + }, + { + "ph": "s", "id": 241, "pid": 2338708, "tid": 2338708, "ts": 6345940285821.113, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345940285839.853, "dur": 29.201, + "args": { + "External id": 986593,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345940285840.621, "dur": 28.171, + "args": { + "External id": 986594,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940285842.462, "dur": 5.330, + "args": { + "External id": 986595,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940285844.208, "dur": 3.043, + "args": { + "External id": 986596,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940285848.831, "dur": 19.457, + "args": { + "External id": 986597,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940285888.477, "dur": 32.037, + "args": { + "External id": 986598,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940285889.739, "dur": 6.069, + "args": { + "External id": 986599,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940285892.225, "dur": 3.201, + "args": { + "External id": 986600,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940285897.500, "dur": 22.723, + "args": { + "External id": 986601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940285902.210, "dur": 17.535, + "args": { + "External id": 986602,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940285924.870, "dur": 22.391, + "args": { + "External id": 986603,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940285925.977, "dur": 5.498, + "args": { + "External id": 986604,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940285927.807, "dur": 3.391, + "args": { + "External id": 986605,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940285932.059, "dur": 14.956, + "args": { + "External id": 986606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940285932.800, "dur": 13.872, + "args": { + "External id": 986607,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345940285953.993, "dur": 24.200, + "args": { + "External id": 986608,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940285955.913, "dur": 5.580, + "args": { + "External id": 986609,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940285962.283, "dur": 15.570, + "args": { + "External id": 986610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940285965.754, "dur": 11.782, + "args": { + "External id": 986611,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2338708, + "ts": 6345940285984.071, "dur": 51.582, + "args": { + "External id": 986612,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940286040.850, "dur": 125.134, + "args": { + "External id": 986613,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940286043.731, "dur": 121.536, + "args": { + "External id": 986614,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940286049.708, "dur": 1.297, + "args": { + "External id": 986615,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345940286052.558, "dur": 93.198, + "args": { + "External id": 986616,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940286054.418, "dur": 91.028, + "args": { + "External id": 986617,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940286116.342, "dur": 6.620, + "args": { + "External id": 986618,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940286124.951, "dur": 20.019, + "args": { + "External id": 986619,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2338708, + "ts": 6345940286173.156, "dur": 39453.849, + "args": { + "External id": 986620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2338708, + "ts": 6345940286175.356, "dur": 39450.526, + "args": { + "External id": 986621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940325642.971, "dur": 10.278, + "args": { + "External id": 986622,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940325649.361, "dur": 1.688, + "args": { + "External id": 986623,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940325659.562, "dur": 149.607, + "args": { + "External id": 986624,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940325679.655, "dur": 14.537, + "args": { + "External id": 986625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940325682.857, "dur": 10.120, + "args": { + "External id": 986626,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940325691.869, "dur": 0.765, + "args": { + "External id": 986627,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940325698.281, "dur": 110.041, + "args": { + "External id": 986628,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940325700.324, "dur": 107.119, + "args": { + "External id": 986629,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940325814.194, "dur": 5.711, + "args": { + "External id": 986630,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940325817.254, "dur": 0.782, + "args": { + "External id": 986631,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940325829.702, "dur": 2.743, + "args": { + "External id": 986632,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940325844.585, "dur": 11.097, + "args": { + "External id": 986633,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940325849.985, "dur": 5.358, + "args": { + "External id": 986634,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940326037.355, "dur": 281.101, + "args": { + "External id": 986635,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940326043.283, "dur": 4.128, + "args": { + "External id": 986636,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940326049.959, "dur": 267.746, + "args": { + "External id": 986637,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940326083.303, "dur": 0.848, + "args": { + "External id": 986638,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940326086.781, "dur": 36.133, + "args": { + "External id": 986639,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940326125.119, "dur": 6.834, + "args": { + "External id": 986640,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940326130.710, "dur": 0.870, + "args": { + "External id": 986641,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940326133.358, "dur": 31.370, + "args": { + "External id": 986642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940326138.003, "dur": 1.558, + "args": { + "External id": 986643,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940326141.359, "dur": 23.023, + "args": { + "External id": 986644,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940326145.846, "dur": 4.085, + "args": { + "External id": 986645,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940326166.968, "dur": 31.512, + "args": { + "External id": 986646,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940326200.936, "dur": 16.487, + "args": { + "External id": 986647,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940326221.513, "dur": 17.810, + "args": { + "External id": 986648,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940326241.169, "dur": 14.955, + "args": { + "External id": 986649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940326258.619, "dur": 27.535, + "args": { + "External id": 986650,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940326261.632, "dur": 2.017, + "args": { + "External id": 986651,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940326269.135, "dur": 0.797, + "args": { + "External id": 986652,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940326287.975, "dur": 13.596, + "args": { + "External id": 986653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940326303.244, "dur": 12.786, + "args": { + "External id": 986654,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940326329.632, "dur": 3.119, + "args": { + "External id": 986655,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940326342.404, "dur": 4.677, + "args": { + "External id": 986656,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940326345.431, "dur": 0.563, + "args": { + "External id": 986657,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940326446.924, "dur": 85.957, + "args": { + "External id": 986658,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940326542.532, "dur": 10.033, + "args": { + "External id": 986659,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940326546.624, "dur": 1.140, + "args": { + "External id": 986660,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940326554.964, "dur": 31.760, + "args": { + "External id": 986661,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940326596.897, "dur": 9.639, + "args": { + "External id": 986662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940326599.213, "dur": 6.233, + "args": { + "External id": 986663,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940326601.696, "dur": 3.360, + "args": { + "External id": 986664,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940326612.910, "dur": 55.511, + "args": { + "External id": 986665,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940326614.778, "dur": 52.536, + "args": { + "External id": 986666,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940326674.309, "dur": 19.165, + "args": { + "External id": 986667,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940326701.647, "dur": 4.958, + "args": { + "External id": 986668,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940326704.613, "dur": 0.700, + "args": { + "External id": 986669,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940326712.383, "dur": 54.778, + "args": { + "External id": 986670,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940326713.547, "dur": 7.022, + "args": { + "External id": 986671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940326717.284, "dur": 2.520, + "args": { + "External id": 986672,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940326718.727, "dur": 0.828, + "args": { + "External id": 986673,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940326721.349, "dur": 45.307, + "args": { + "External id": 986674,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940326721.972, "dur": 44.036, + "args": { + "External id": 986675,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940326772.799, "dur": 4.782, + "args": { + "External id": 986676,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940326775.435, "dur": 0.524, + "args": { + "External id": 986677,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940326785.279, "dur": 2.061, + "args": { + "External id": 986678,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940326799.800, "dur": 7.906, + "args": { + "External id": 986679,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940326802.358, "dur": 4.907, + "args": { + "External id": 986680,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940326923.645, "dur": 286.722, + "args": { + "External id": 986681,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940326925.989, "dur": 2.486, + "args": { + "External id": 986682,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940326933.260, "dur": 276.187, + "args": { + "External id": 986683,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940326934.852, "dur": 0.731, + "args": { + "External id": 986684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940326939.661, "dur": 22.880, + "args": { + "External id": 986685,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940326964.521, "dur": 5.992, + "args": { + "External id": 986686,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940326966.985, "dur": 3.139, + "args": { + "External id": 986687,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940326971.812, "dur": 25.773, + "args": { + "External id": 986688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940326973.219, "dur": 4.240, + "args": { + "External id": 986689,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940326979.060, "dur": 18.180, + "args": { + "External id": 986690,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940326981.913, "dur": 2.511, + "args": { + "External id": 986691,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940326999.389, "dur": 46.460, + "args": { + "External id": 986692,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940327049.130, "dur": 57.700, + "args": { + "External id": 986693,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940327112.640, "dur": 16.193, + "args": { + "External id": 986694,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940327130.581, "dur": 15.294, + "args": { + "External id": 986695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940327151.326, "dur": 28.032, + "args": { + "External id": 986696,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940327154.217, "dur": 3.177, + "args": { + "External id": 986697,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940327160.214, "dur": 3.318, + "args": { + "External id": 986698,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940327181.398, "dur": 13.943, + "args": { + "External id": 986699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940327196.734, "dur": 11.215, + "args": { + "External id": 986700,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940327221.836, "dur": 3.280, + "args": { + "External id": 986701,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940327238.270, "dur": 4.679, + "args": { + "External id": 986702,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940327241.166, "dur": 0.680, + "args": { + "External id": 986703,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940327340.549, "dur": 72.688, + "args": { + "External id": 986704,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940327420.092, "dur": 6.541, + "args": { + "External id": 986705,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940327424.035, "dur": 1.126, + "args": { + "External id": 986706,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940327428.176, "dur": 31.365, + "args": { + "External id": 986707,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940327465.336, "dur": 9.495, + "args": { + "External id": 986708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940327467.147, "dur": 6.592, + "args": { + "External id": 986709,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940327471.808, "dur": 1.653, + "args": { + "External id": 986710,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940327478.344, "dur": 51.010, + "args": { + "External id": 986711,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940327479.426, "dur": 48.991, + "args": { + "External id": 986712,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940327534.341, "dur": 18.400, + "args": { + "External id": 986713,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940327560.287, "dur": 4.923, + "args": { + "External id": 986714,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940327563.201, "dur": 0.800, + "args": { + "External id": 986715,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940327570.414, "dur": 56.858, + "args": { + "External id": 986716,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940327574.709, "dur": 4.547, + "args": { + "External id": 986717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940327575.528, "dur": 3.013, + "args": { + "External id": 986718,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940327577.479, "dur": 0.885, + "args": { + "External id": 986719,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940327579.900, "dur": 46.831, + "args": { + "External id": 986720,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940327580.718, "dur": 45.116, + "args": { + "External id": 986721,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940327632.975, "dur": 4.620, + "args": { + "External id": 986722,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940327635.629, "dur": 0.581, + "args": { + "External id": 986723,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940327646.781, "dur": 1.692, + "args": { + "External id": 986724,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940327658.406, "dur": 10.514, + "args": { + "External id": 986725,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940327660.698, "dur": 7.903, + "args": { + "External id": 986726,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940327779.454, "dur": 326.449, + "args": { + "External id": 986727,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940327785.280, "dur": 2.173, + "args": { + "External id": 986728,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940327789.608, "dur": 315.412, + "args": { + "External id": 986729,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940327794.746, "dur": 0.576, + "args": { + "External id": 986730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940327796.627, "dur": 32.862, + "args": { + "External id": 986731,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940327831.875, "dur": 5.459, + "args": { + "External id": 986732,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940327836.519, "dur": 0.528, + "args": { + "External id": 986733,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940327838.577, "dur": 31.747, + "args": { + "External id": 986734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940327840.230, "dur": 1.272, + "args": { + "External id": 986735,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940327842.783, "dur": 27.222, + "args": { + "External id": 986736,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940327845.988, "dur": 3.160, + "args": { + "External id": 986737,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940327871.876, "dur": 39.313, + "args": { + "External id": 986738,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940327913.039, "dur": 20.063, + "args": { + "External id": 986739,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940327939.375, "dur": 20.000, + "args": { + "External id": 986740,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940327961.188, "dur": 16.388, + "args": { + "External id": 986741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940327979.895, "dur": 26.814, + "args": { + "External id": 986742,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940327984.855, "dur": 1.680, + "args": { + "External id": 986743,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940327989.309, "dur": 0.628, + "args": { + "External id": 986744,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940328026.068, "dur": 17.772, + "args": { + "External id": 986745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940328047.988, "dur": 54.293, + "args": { + "External id": 986746,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940328117.454, "dur": 3.360, + "args": { + "External id": 986747,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940328133.706, "dur": 5.650, + "args": { + "External id": 986748,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940328137.370, "dur": 0.793, + "args": { + "External id": 986749,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940328229.476, "dur": 72.925, + "args": { + "External id": 986750,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940328309.140, "dur": 5.937, + "args": { + "External id": 986751,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940328312.632, "dur": 0.870, + "args": { + "External id": 986752,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940328316.728, "dur": 33.669, + "args": { + "External id": 986753,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940328358.670, "dur": 6.770, + "args": { + "External id": 986754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940328360.590, "dur": 4.084, + "args": { + "External id": 986755,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940328363.374, "dur": 1.011, + "args": { + "External id": 986756,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940328368.728, "dur": 50.129, + "args": { + "External id": 986757,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940328369.742, "dur": 48.348, + "args": { + "External id": 986758,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940328424.220, "dur": 20.417, + "args": { + "External id": 986759,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940328451.844, "dur": 6.635, + "args": { + "External id": 986760,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940328456.900, "dur": 0.507, + "args": { + "External id": 986761,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940328463.346, "dur": 55.689, + "args": { + "External id": 986762,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940328464.561, "dur": 4.321, + "args": { + "External id": 986763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940328465.590, "dur": 2.627, + "args": { + "External id": 986764,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940328467.355, "dur": 0.682, + "args": { + "External id": 986765,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940328469.483, "dur": 49.168, + "args": { + "External id": 986766,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940328472.668, "dur": 45.411, + "args": { + "External id": 986767,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940328524.471, "dur": 4.057, + "args": { + "External id": 986768,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940328526.667, "dur": 0.599, + "args": { + "External id": 986769,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940328535.368, "dur": 1.744, + "args": { + "External id": 986770,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940328546.742, "dur": 9.943, + "args": { + "External id": 986771,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940328549.411, "dur": 6.908, + "args": { + "External id": 986772,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940328663.410, "dur": 227.546, + "args": { + "External id": 986773,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940328667.904, "dur": 1.727, + "args": { + "External id": 986774,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940328671.613, "dur": 218.470, + "args": { + "External id": 986775,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940328673.778, "dur": 0.594, + "args": { + "External id": 986776,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940328675.991, "dur": 26.137, + "args": { + "External id": 986777,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940328704.323, "dur": 6.360, + "args": { + "External id": 986778,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940328709.491, "dur": 1.003, + "args": { + "External id": 986779,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940328711.756, "dur": 27.265, + "args": { + "External id": 986780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940328713.043, "dur": 1.115, + "args": { + "External id": 986781,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940328718.155, "dur": 20.564, + "args": { + "External id": 986782,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940328721.248, "dur": 2.430, + "args": { + "External id": 986783,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940328740.526, "dur": 26.640, + "args": { + "External id": 986784,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940328768.894, "dur": 17.711, + "args": { + "External id": 986785,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940328789.289, "dur": 15.936, + "args": { + "External id": 986786,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940328810.171, "dur": 16.401, + "args": { + "External id": 986787,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940328828.707, "dur": 25.595, + "args": { + "External id": 986788,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940328831.412, "dur": 1.924, + "args": { + "External id": 986789,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940328835.915, "dur": 0.555, + "args": { + "External id": 986790,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940328858.672, "dur": 16.159, + "args": { + "External id": 986791,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940328876.167, "dur": 12.480, + "args": { + "External id": 986792,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940328898.958, "dur": 2.065, + "args": { + "External id": 986793,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940328911.098, "dur": 3.724, + "args": { + "External id": 986794,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940328913.295, "dur": 0.551, + "args": { + "External id": 986795,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940328993.134, "dur": 121.563, + "args": { + "External id": 986796,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940329123.701, "dur": 7.851, + "args": { + "External id": 986797,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940329128.496, "dur": 1.180, + "args": { + "External id": 986798,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940329135.825, "dur": 30.261, + "args": { + "External id": 986799,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940329172.814, "dur": 9.106, + "args": { + "External id": 986800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940329174.929, "dur": 6.097, + "args": { + "External id": 986801,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940329177.086, "dur": 3.626, + "args": { + "External id": 986802,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940329185.582, "dur": 55.059, + "args": { + "External id": 986803,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940329187.030, "dur": 52.968, + "args": { + "External id": 986804,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940329246.288, "dur": 19.589, + "args": { + "External id": 986805,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940329276.431, "dur": 5.133, + "args": { + "External id": 986806,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940329279.773, "dur": 0.607, + "args": { + "External id": 986807,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940329286.313, "dur": 53.241, + "args": { + "External id": 986808,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940329287.190, "dur": 3.836, + "args": { + "External id": 986809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940329288.243, "dur": 2.088, + "args": { + "External id": 986810,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940329289.537, "dur": 0.615, + "args": { + "External id": 986811,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940329294.519, "dur": 44.381, + "args": { + "External id": 986812,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940329295.266, "dur": 43.030, + "args": { + "External id": 986813,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940329344.647, "dur": 4.811, + "args": { + "External id": 986814,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940329347.376, "dur": 0.544, + "args": { + "External id": 986815,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940329356.740, "dur": 1.806, + "args": { + "External id": 986816,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940329368.226, "dur": 13.267, + "args": { + "External id": 986817,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940329373.127, "dur": 8.017, + "args": { + "External id": 986818,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940329493.577, "dur": 218.072, + "args": { + "External id": 986819,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940329496.168, "dur": 2.207, + "args": { + "External id": 986820,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940329502.620, "dur": 208.375, + "args": { + "External id": 986821,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940329504.008, "dur": 0.587, + "args": { + "External id": 986822,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940329505.972, "dur": 26.074, + "args": { + "External id": 986823,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940329533.713, "dur": 5.926, + "args": { + "External id": 986824,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940329536.087, "dur": 3.088, + "args": { + "External id": 986825,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940329541.102, "dur": 29.152, + "args": { + "External id": 986826,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940329545.364, "dur": 1.603, + "args": { + "External id": 986827,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940329548.333, "dur": 21.528, + "args": { + "External id": 986828,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940329551.475, "dur": 3.545, + "args": { + "External id": 986829,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940329571.932, "dur": 24.435, + "args": { + "External id": 986830,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940329598.361, "dur": 16.688, + "args": { + "External id": 986831,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940329618.528, "dur": 16.304, + "args": { + "External id": 986832,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940329636.587, "dur": 15.949, + "args": { + "External id": 986833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940329654.342, "dur": 26.988, + "args": { + "External id": 986834,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940329656.654, "dur": 2.030, + "args": { + "External id": 986835,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940329663.741, "dur": 2.840, + "args": { + "External id": 986836,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940329682.859, "dur": 13.249, + "args": { + "External id": 986837,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940329697.534, "dur": 12.174, + "args": { + "External id": 986838,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940329719.980, "dur": 2.340, + "args": { + "External id": 986839,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940329733.661, "dur": 4.081, + "args": { + "External id": 986840,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940329736.070, "dur": 0.616, + "args": { + "External id": 986841,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940329817.228, "dur": 62.018, + "args": { + "External id": 986842,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940329887.582, "dur": 5.523, + "args": { + "External id": 986843,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940329890.762, "dur": 1.087, + "args": { + "External id": 986844,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940329894.508, "dur": 26.647, + "args": { + "External id": 986845,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940329926.520, "dur": 6.620, + "args": { + "External id": 986846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940329928.190, "dur": 3.994, + "args": { + "External id": 986847,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940329930.395, "dur": 1.540, + "args": { + "External id": 986848,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940329939.462, "dur": 46.933, + "args": { + "External id": 986849,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940329940.701, "dur": 45.067, + "args": { + "External id": 986850,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940329991.367, "dur": 35.920, + "args": { + "External id": 986851,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940330037.887, "dur": 6.737, + "args": { + "External id": 986852,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940330042.015, "dur": 0.997, + "args": { + "External id": 986853,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940330050.076, "dur": 106.645, + "args": { + "External id": 986854,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940330050.982, "dur": 49.828, + "args": { + "External id": 986855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940330055.038, "dur": 44.708, + "args": { + "External id": 986856,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940330095.248, "dur": 3.901, + "args": { + "External id": 986857,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940330102.016, "dur": 54.157, + "args": { + "External id": 986858,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940330103.187, "dur": 52.193, + "args": { + "External id": 986859,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940330163.377, "dur": 5.148, + "args": { + "External id": 986860,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940330166.396, "dur": 0.636, + "args": { + "External id": 986861,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940330176.455, "dur": 2.021, + "args": { + "External id": 986862,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940330190.612, "dur": 7.790, + "args": { + "External id": 986863,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940330193.110, "dur": 4.949, + "args": { + "External id": 986864,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940330309.399, "dur": 211.840, + "args": { + "External id": 986865,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940330314.390, "dur": 1.940, + "args": { + "External id": 986866,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940330318.236, "dur": 202.334, + "args": { + "External id": 986867,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940330319.747, "dur": 0.403, + "args": { + "External id": 986868,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940330324.536, "dur": 25.456, + "args": { + "External id": 986869,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940330351.809, "dur": 5.054, + "args": { + "External id": 986870,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940330355.957, "dur": 0.670, + "args": { + "External id": 986871,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940330357.910, "dur": 24.149, + "args": { + "External id": 986872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940330359.383, "dur": 1.739, + "args": { + "External id": 986873,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940330362.829, "dur": 18.935, + "args": { + "External id": 986874,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940330366.149, "dur": 2.502, + "args": { + "External id": 986875,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940330383.768, "dur": 26.161, + "args": { + "External id": 986876,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940330412.294, "dur": 18.998, + "args": { + "External id": 986877,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940330434.720, "dur": 15.089, + "args": { + "External id": 986878,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940330451.303, "dur": 13.772, + "args": { + "External id": 986879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940330470.366, "dur": 22.074, + "args": { + "External id": 986880,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940330472.990, "dur": 2.144, + "args": { + "External id": 986881,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940330477.239, "dur": 0.665, + "args": { + "External id": 986882,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940330493.987, "dur": 12.890, + "args": { + "External id": 986883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940330508.058, "dur": 11.240, + "args": { + "External id": 986884,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940330529.358, "dur": 1.992, + "args": { + "External id": 986885,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940330542.067, "dur": 5.490, + "args": { + "External id": 986886,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940330545.909, "dur": 0.459, + "args": { + "External id": 986887,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940330628.004, "dur": 60.069, + "args": { + "External id": 986888,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940330693.820, "dur": 5.707, + "args": { + "External id": 986889,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940330697.071, "dur": 0.858, + "args": { + "External id": 986890,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940330700.886, "dur": 26.406, + "args": { + "External id": 986891,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940330732.693, "dur": 8.576, + "args": { + "External id": 986892,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940330734.324, "dur": 6.073, + "args": { + "External id": 986893,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940330738.841, "dur": 1.332, + "args": { + "External id": 986894,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940330744.307, "dur": 46.737, + "args": { + "External id": 986895,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940330745.614, "dur": 44.710, + "args": { + "External id": 986896,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940330795.520, "dur": 16.718, + "args": { + "External id": 986897,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940330819.484, "dur": 4.813, + "args": { + "External id": 986898,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940330822.657, "dur": 0.581, + "args": { + "External id": 986899,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940330828.788, "dur": 52.650, + "args": { + "External id": 986900,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940330832.588, "dur": 3.741, + "args": { + "External id": 986901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940330833.308, "dur": 2.383, + "args": { + "External id": 986902,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940330834.934, "dur": 0.608, + "args": { + "External id": 986903,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940330836.963, "dur": 44.124, + "args": { + "External id": 986904,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940330837.783, "dur": 42.688, + "args": { + "External id": 986905,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940330886.923, "dur": 7.263, + "args": { + "External id": 986906,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940330892.065, "dur": 0.733, + "args": { + "External id": 986907,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940330903.245, "dur": 1.451, + "args": { + "External id": 986908,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940330912.928, "dur": 10.975, + "args": { + "External id": 986909,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940330915.120, "dur": 8.416, + "args": { + "External id": 986910,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940331042.169, "dur": 266.657, + "args": { + "External id": 986911,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940331044.764, "dur": 3.807, + "args": { + "External id": 986912,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940331051.055, "dur": 257.134, + "args": { + "External id": 986913,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940331091.206, "dur": 0.862, + "args": { + "External id": 986914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940331094.975, "dur": 34.253, + "args": { + "External id": 986915,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940331131.176, "dur": 3.687, + "args": { + "External id": 986916,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940331133.715, "dur": 0.866, + "args": { + "External id": 986917,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940331135.972, "dur": 26.822, + "args": { + "External id": 986918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940331137.680, "dur": 1.794, + "args": { + "External id": 986919,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940331141.026, "dur": 21.372, + "args": { + "External id": 986920,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940331146.452, "dur": 3.383, + "args": { + "External id": 986921,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940331164.132, "dur": 23.640, + "args": { + "External id": 986922,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940331189.552, "dur": 16.733, + "args": { + "External id": 986923,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940331212.435, "dur": 16.901, + "args": { + "External id": 986924,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940331230.929, "dur": 16.187, + "args": { + "External id": 986925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940331249.214, "dur": 23.304, + "args": { + "External id": 986926,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940331251.520, "dur": 1.786, + "args": { + "External id": 986927,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940331255.437, "dur": 0.776, + "args": { + "External id": 986928,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940331274.191, "dur": 15.096, + "args": { + "External id": 986929,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940331292.603, "dur": 14.359, + "args": { + "External id": 986930,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940331318.663, "dur": 2.643, + "args": { + "External id": 986931,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940331333.005, "dur": 4.309, + "args": { + "External id": 986932,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940331335.791, "dur": 0.464, + "args": { + "External id": 986933,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940331422.529, "dur": 70.062, + "args": { + "External id": 986934,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940331498.809, "dur": 5.445, + "args": { + "External id": 986935,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940331502.256, "dur": 0.861, + "args": { + "External id": 986936,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940331505.849, "dur": 31.009, + "args": { + "External id": 986937,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940331544.405, "dur": 6.213, + "args": { + "External id": 986938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940331546.212, "dur": 3.392, + "args": { + "External id": 986939,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940331548.484, "dur": 0.905, + "args": { + "External id": 986940,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940331553.566, "dur": 52.663, + "args": { + "External id": 986941,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940331554.600, "dur": 50.764, + "args": { + "External id": 986942,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940331611.109, "dur": 19.015, + "args": { + "External id": 986943,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940331636.703, "dur": 6.787, + "args": { + "External id": 986944,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940331641.696, "dur": 0.660, + "args": { + "External id": 986945,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940331648.073, "dur": 55.871, + "args": { + "External id": 986946,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940331649.053, "dur": 6.007, + "args": { + "External id": 986947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940331649.765, "dur": 4.519, + "args": { + "External id": 986948,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940331651.264, "dur": 2.790, + "args": { + "External id": 986949,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940331655.763, "dur": 47.817, + "args": { + "External id": 986950,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940331659.128, "dur": 43.723, + "args": { + "External id": 986951,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940331708.602, "dur": 5.230, + "args": { + "External id": 986952,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940331711.379, "dur": 0.870, + "args": { + "External id": 986953,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940331720.347, "dur": 1.573, + "args": { + "External id": 986954,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940331730.928, "dur": 8.179, + "args": { + "External id": 986955,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940331732.978, "dur": 5.790, + "args": { + "External id": 986956,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940331840.113, "dur": 271.935, + "args": { + "External id": 986957,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940331842.694, "dur": 2.319, + "args": { + "External id": 986958,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940331846.861, "dur": 264.555, + "args": { + "External id": 986959,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940331848.553, "dur": 0.409, + "args": { + "External id": 986960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940331852.709, "dur": 23.230, + "args": { + "External id": 986961,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940331877.768, "dur": 3.093, + "args": { + "External id": 986962,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940331879.787, "dur": 0.827, + "args": { + "External id": 986963,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940331882.046, "dur": 29.437, + "args": { + "External id": 986964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940331883.286, "dur": 3.681, + "args": { + "External id": 986965,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940331891.047, "dur": 19.944, + "args": { + "External id": 986966,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940331893.423, "dur": 2.538, + "args": { + "External id": 986967,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940331913.166, "dur": 21.315, + "args": { + "External id": 986968,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940331936.267, "dur": 16.358, + "args": { + "External id": 986969,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940331955.069, "dur": 16.012, + "args": { + "External id": 986970,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940331972.363, "dur": 14.838, + "args": { + "External id": 986971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940331989.044, "dur": 43.298, + "args": { + "External id": 986972,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940331991.332, "dur": 1.876, + "args": { + "External id": 986973,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940331995.324, "dur": 0.642, + "args": { + "External id": 986974,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332037.667, "dur": 16.215, + "args": { + "External id": 986975,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332054.988, "dur": 53.627, + "args": { + "External id": 986976,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940332123.264, "dur": 2.879, + "args": { + "External id": 986977,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940332137.521, "dur": 4.787, + "args": { + "External id": 986978,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940332140.736, "dur": 0.606, + "args": { + "External id": 986979,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940332224.758, "dur": 67.185, + "args": { + "External id": 986980,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940332297.831, "dur": 5.832, + "args": { + "External id": 986981,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940332301.471, "dur": 0.908, + "args": { + "External id": 986982,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332307.969, "dur": 28.907, + "args": { + "External id": 986983,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940332342.704, "dur": 8.839, + "args": { + "External id": 986984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940332344.415, "dur": 6.080, + "args": { + "External id": 986985,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940332346.811, "dur": 3.408, + "args": { + "External id": 986986,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940332354.738, "dur": 51.245, + "args": { + "External id": 986987,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940332356.014, "dur": 49.186, + "args": { + "External id": 986988,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332410.677, "dur": 19.152, + "args": { + "External id": 986989,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940332438.508, "dur": 28.941, + "args": { + "External id": 986990,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940332441.536, "dur": 25.459, + "args": { + "External id": 986991,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940332447.663, "dur": 0.683, + "args": { + "External id": 986992,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345940332473.503, "dur": 32.751, + "args": { + "External id": 986993,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940332475.632, "dur": 30.338, + "args": { + "External id": 986994,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 19894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940332481.256, "dur": 4.527, + "args": { + "External id": 986995,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332487.149, "dur": 18.182, + "args": { + "External id": 986996,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940332522.206, "dur": 8.333, + "args": { + "External id": 986997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940332526.991, "dur": 3.083, + "args": { + "External id": 986998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940332531.763, "dur": 1.313, + "args": { + "External id": 986999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940332532.237, "dur": 0.732, + "args": { + "External id": 987000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332584.290, "dur": 27.133, + "args": { + "External id": 987001,"Sequence number": 10552679, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332614.023, "dur": 16.055, + "args": { + "External id": 987002,"Sequence number": 10552680, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 19902 + } + }, + { + "ph": "s", "id": 240, "pid": 2338708, "tid": 2338708, "ts": 6345940332614.023, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940332636.857, "dur": 7.234, + "args": { + "External id": 987003,"Sequence number": 10552681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 19903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940332641.243, "dur": 1.252, + "args": { + "External id": 987004,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345940332647.356, "dur": 9.419, + "args": { + "External id": 987005,"Sequence number": 10552681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "2"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 19905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940332654.716, "dur": 0.646, + "args": { + "External id": 987006,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 19906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940332658.391, "dur": 3.197, + "args": { + "External id": 987007,"Sequence number": 10552681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 19907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940332660.364, "dur": 0.479, + "args": { + "External id": 987008,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "3"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 19908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940332666.261, "dur": 6.147, + "args": { + "External id": 987009,"Sequence number": 10552681, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19909 + } + }, + { + "ph": "s", "id": 239, "pid": 2338708, "tid": 2338708, "ts": 6345940332666.261, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940332669.782, "dur": 0.881, + "args": { + "External id": 987010,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940332676.257, "dur": 5.335, + "args": { + "External id": 987011,"Sequence number": 10552682, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 19911 + } + }, + { + "ph": "s", "id": 238, "pid": 2338708, "tid": 2338708, "ts": 6345940332676.257, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940332680.331, "dur": 0.366, + "args": { + "External id": 987012,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345940332682.811, "dur": 5.537, + "args": { + "External id": 987013,"Sequence number": 10552683, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "2"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 19913 + } + }, + { + "ph": "s", "id": 237, "pid": 2338708, "tid": 2338708, "ts": 6345940332682.811, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940332686.611, "dur": 0.612, + "args": { + "External id": 987014,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 19914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940332689.570, "dur": 7.313, + "args": { + "External id": 987015,"Sequence number": 10552684, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 19915 + } + }, + { + "ph": "s", "id": 236, "pid": 2338708, "tid": 2338708, "ts": 6345940332689.570, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940332692.544, "dur": 3.426, + "args": { + "External id": 987016,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "8192"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 19916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345940332701.337, "dur": 35.393, + "args": { + "External id": 987017,"Sequence number": 10552685, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345940332705.510, "dur": 30.901, + "args": { + "External id": 987018,"Sequence number": 10552685, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940332708.575, "dur": 8.181, + "args": { + "External id": 987019,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 19919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940332711.563, "dur": 4.474, + "args": { + "External id": 987020,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332717.831, "dur": 18.061, + "args": { + "External id": 987021,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 19921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940332768.177, "dur": 4.473, + "args": { + "External id": 987022,"Sequence number": 10552685, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 19922 + } + }, + { + "ph": "s", "id": 235, "pid": 2338708, "tid": 2338708, "ts": 6345940332768.177, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940332775.611, "dur": 1.232, + "args": { + "External id": 987023,"Sequence number": 10552686, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 19923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940332820.502, "dur": 46633.844, + "args": { + "External id": 987024,"Sequence number": 10552686, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 19924 + } + }, + { + "ph": "s", "id": 234, "pid": 2338708, "tid": 2338708, "ts": 6345940332820.502, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345940332838.817, "dur": 34.427, + "args": { + "External id": 987025,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345940332844.744, "dur": 28.052, + "args": { + "External id": 987026,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940332846.506, "dur": 5.521, + "args": { + "External id": 987027,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940332848.233, "dur": 3.306, + "args": { + "External id": 987028,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332853.096, "dur": 19.025, + "args": { + "External id": 987029,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 19929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940332893.033, "dur": 31.654, + "args": { + "External id": 987030,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940332894.577, "dur": 6.552, + "args": { + "External id": 987031,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 19931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940332896.644, "dur": 4.138, + "args": { + "External id": 987032,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332902.830, "dur": 21.574, + "args": { + "External id": 987033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 19933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332907.592, "dur": 16.369, + "args": { + "External id": 987034,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 19934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940332929.229, "dur": 24.182, + "args": { + "External id": 987035,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940332929.993, "dur": 4.920, + "args": { + "External id": 987036,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 19936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940332931.801, "dur": 2.811, + "args": { + "External id": 987037,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332935.477, "dur": 17.648, + "args": { + "External id": 987038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332936.077, "dur": 16.607, + "args": { + "External id": 987039,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 19939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345940332960.325, "dur": 21.316, + "args": { + "External id": 987040,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 19940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940332962.473, "dur": 3.602, + "args": { + "External id": 987041,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332966.731, "dur": 14.553, + "args": { + "External id": 987042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 19942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940332969.735, "dur": 11.184, + "args": { + "External id": 987043,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2338708, + "ts": 6345940332986.915, "dur": 51.992, + "args": { + "External id": 987044,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940333044.074, "dur": 104.065, + "args": { + "External id": 987045,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 19945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940333046.528, "dur": 100.890, + "args": { + "External id": 987046,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940333052.991, "dur": 1.422, + "args": { + "External id": 987047,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 19947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345940333095.311, "dur": 33.850, + "args": { + "External id": 987048,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940333100.231, "dur": 28.726, + "args": { + "External id": 987049,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 19949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940333103.337, "dur": 4.980, + "args": { + "External id": 987050,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940333109.721, "dur": 18.702, + "args": { + "External id": 987051,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 19951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2338708, + "ts": 6345940333156.341, "dur": 39345.444, + "args": { + "External id": 987052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2338708, + "ts": 6345940333158.198, "dur": 39342.607, + "args": { + "External id": 987053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940372515.523, "dur": 7.805, + "args": { + "External id": 987054,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940372519.964, "dur": 1.155, + "args": { + "External id": 987055,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940372529.588, "dur": 122.902, + "args": { + "External id": 987056,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 19956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940372531.425, "dur": 7.693, + "args": { + "External id": 987057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 19957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940372534.287, "dur": 3.702, + "args": { + "External id": 987058,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 19958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940372536.917, "dur": 0.733, + "args": { + "External id": 987059,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 19959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940372543.069, "dur": 108.624, + "args": { + "External id": 987060,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940372544.839, "dur": 105.533, + "args": { + "External id": 987061,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 19961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940372657.669, "dur": 4.918, + "args": { + "External id": 987062,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940372660.308, "dur": 0.771, + "args": { + "External id": 987063,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940372671.806, "dur": 2.744, + "args": { + "External id": 987064,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 19964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940372685.519, "dur": 10.413, + "args": { + "External id": 987065,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 19965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940372690.856, "dur": 4.685, + "args": { + "External id": 987066,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940372858.866, "dur": 307.320, + "args": { + "External id": 987067,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940372862.616, "dur": 4.837, + "args": { + "External id": 987068,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940372869.214, "dur": 296.033, + "args": { + "External id": 987069,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 19969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940372871.079, "dur": 0.570, + "args": { + "External id": 987070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940372873.640, "dur": 30.899, + "args": { + "External id": 987071,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 19971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940372906.922, "dur": 6.738, + "args": { + "External id": 987072,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 19972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940372912.510, "dur": 0.766, + "args": { + "External id": 987073,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 19973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940372915.053, "dur": 36.190, + "args": { + "External id": 987074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940372924.134, "dur": 1.453, + "args": { + "External id": 987075,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 19975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940372927.216, "dur": 23.677, + "args": { + "External id": 987076,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 19976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940372932.093, "dur": 3.674, + "args": { + "External id": 987077,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940372953.353, "dur": 25.567, + "args": { + "External id": 987078,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940372981.193, "dur": 20.788, + "args": { + "External id": 987079,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940373005.693, "dur": 34.718, + "args": { + "External id": 987080,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 19980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940373043.224, "dur": 50.888, + "args": { + "External id": 987081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 19981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940373098.280, "dur": 31.854, + "args": { + "External id": 987082,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 19982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940373101.560, "dur": 2.802, + "args": { + "External id": 987083,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 19983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940373109.400, "dur": 1.040, + "args": { + "External id": 987084,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 19984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940373132.166, "dur": 15.808, + "args": { + "External id": 987085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 19985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940373149.554, "dur": 14.377, + "args": { + "External id": 987086,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 19986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940373178.007, "dur": 3.119, + "args": { + "External id": 987087,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 19987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940373191.260, "dur": 4.847, + "args": { + "External id": 987088,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 19988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940373194.399, "dur": 0.468, + "args": { + "External id": 987089,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 19989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940373299.266, "dur": 87.823, + "args": { + "External id": 987090,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 19990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940373396.200, "dur": 8.999, + "args": { + "External id": 987091,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 19991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940373399.699, "dur": 0.998, + "args": { + "External id": 987092,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 19992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940373407.341, "dur": 37.667, + "args": { + "External id": 987093,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 19993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940373452.099, "dur": 7.669, + "args": { + "External id": 987094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 19994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940373454.102, "dur": 4.631, + "args": { + "External id": 987095,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 19995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940373456.755, "dur": 1.647, + "args": { + "External id": 987096,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 19996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940373466.323, "dur": 57.612, + "args": { + "External id": 987097,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940373467.850, "dur": 55.151, + "args": { + "External id": 987098,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 19998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940373528.768, "dur": 19.607, + "args": { + "External id": 987099,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 19999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940373556.234, "dur": 4.895, + "args": { + "External id": 987100,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940373559.191, "dur": 0.829, + "args": { + "External id": 987101,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940373567.144, "dur": 56.542, + "args": { + "External id": 987102,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940373568.077, "dur": 7.085, + "args": { + "External id": 987103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940373571.486, "dur": 2.928, + "args": { + "External id": 987104,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940373573.380, "dur": 0.864, + "args": { + "External id": 987105,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940373575.894, "dur": 47.326, + "args": { + "External id": 987106,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940373576.638, "dur": 45.852, + "args": { + "External id": 987107,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940373628.697, "dur": 4.151, + "args": { + "External id": 987108,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940373631.020, "dur": 0.505, + "args": { + "External id": 987109,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940373642.323, "dur": 1.981, + "args": { + "External id": 987110,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940373654.793, "dur": 10.068, + "args": { + "External id": 987111,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940373657.182, "dur": 7.327, + "args": { + "External id": 987112,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940373781.833, "dur": 251.994, + "args": { + "External id": 987113,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940373784.823, "dur": 2.254, + "args": { + "External id": 987114,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940373788.751, "dur": 244.306, + "args": { + "External id": 987115,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940373793.793, "dur": 0.382, + "args": { + "External id": 987116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940373796.290, "dur": 29.362, + "args": { + "External id": 987117,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940373827.806, "dur": 6.559, + "args": { + "External id": 987118,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940373830.686, "dur": 3.392, + "args": { + "External id": 987119,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940373835.548, "dur": 27.669, + "args": { + "External id": 987120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940373836.783, "dur": 1.759, + "args": { + "External id": 987121,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940373840.309, "dur": 22.488, + "args": { + "External id": 987122,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940373845.966, "dur": 2.538, + "args": { + "External id": 987123,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940373865.169, "dur": 27.062, + "args": { + "External id": 987124,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940373894.063, "dur": 15.835, + "args": { + "External id": 987125,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940373915.763, "dur": 17.023, + "args": { + "External id": 987126,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940373934.592, "dur": 15.572, + "args": { + "External id": 987127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940373952.683, "dur": 25.514, + "args": { + "External id": 987128,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940373955.277, "dur": 2.593, + "args": { + "External id": 987129,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940373960.366, "dur": 0.840, + "args": { + "External id": 987130,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940373979.866, "dur": 15.526, + "args": { + "External id": 987131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940373996.679, "dur": 34.111, + "args": { + "External id": 987132,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940374047.665, "dur": 3.129, + "args": { + "External id": 987133,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940374103.589, "dur": 5.921, + "args": { + "External id": 987134,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940374107.057, "dur": 0.951, + "args": { + "External id": 987135,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940374204.254, "dur": 75.068, + "args": { + "External id": 987136,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940374309.695, "dur": 6.879, + "args": { + "External id": 987137,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940374313.862, "dur": 1.020, + "args": { + "External id": 987138,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940374318.351, "dur": 33.318, + "args": { + "External id": 987139,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940374360.586, "dur": 11.045, + "args": { + "External id": 987140,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940374362.442, "dur": 8.111, + "args": { + "External id": 987141,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940374366.844, "dur": 3.347, + "args": { + "External id": 987142,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940374375.454, "dur": 53.210, + "args": { + "External id": 987143,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940374376.643, "dur": 51.170, + "args": { + "External id": 987144,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940374433.534, "dur": 20.339, + "args": { + "External id": 987145,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940374461.044, "dur": 7.858, + "args": { + "External id": 987146,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940374466.773, "dur": 0.805, + "args": { + "External id": 987147,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940374473.848, "dur": 58.656, + "args": { + "External id": 987148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940374474.745, "dur": 4.018, + "args": { + "External id": 987149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940374475.693, "dur": 2.240, + "args": { + "External id": 987150,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940374476.994, "dur": 0.775, + "args": { + "External id": 987151,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940374479.458, "dur": 52.670, + "args": { + "External id": 987152,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940374482.689, "dur": 48.622, + "args": { + "External id": 987153,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940374538.498, "dur": 7.311, + "args": { + "External id": 987154,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940374541.208, "dur": 3.047, + "args": { + "External id": 987155,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940374553.684, "dur": 1.940, + "args": { + "External id": 987156,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940374565.616, "dur": 8.559, + "args": { + "External id": 987157,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940374568.318, "dur": 5.450, + "args": { + "External id": 987158,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940374691.233, "dur": 229.376, + "args": { + "External id": 987159,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940374693.760, "dur": 2.410, + "args": { + "External id": 987160,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940374700.812, "dur": 218.951, + "args": { + "External id": 987161,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940374702.486, "dur": 0.400, + "args": { + "External id": 987162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940374704.220, "dur": 27.284, + "args": { + "External id": 987163,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940374733.937, "dur": 5.682, + "args": { + "External id": 987164,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940374736.220, "dur": 3.082, + "args": { + "External id": 987165,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940374740.822, "dur": 33.111, + "args": { + "External id": 987166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940374742.371, "dur": 1.594, + "args": { + "External id": 987167,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940374745.777, "dur": 27.774, + "args": { + "External id": 987168,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940374751.591, "dur": 3.661, + "args": { + "External id": 987169,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940374775.399, "dur": 29.314, + "args": { + "External id": 987170,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940374806.859, "dur": 16.067, + "args": { + "External id": 987171,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940374827.161, "dur": 15.484, + "args": { + "External id": 987172,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940374844.426, "dur": 14.637, + "args": { + "External id": 987173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940374861.466, "dur": 26.582, + "args": { + "External id": 987174,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940374864.221, "dur": 2.011, + "args": { + "External id": 987175,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940374869.168, "dur": 3.131, + "args": { + "External id": 987176,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940374892.786, "dur": 13.394, + "args": { + "External id": 987177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940374907.576, "dur": 11.001, + "args": { + "External id": 987178,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940374929.680, "dur": 2.216, + "args": { + "External id": 987179,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940374943.285, "dur": 4.195, + "args": { + "External id": 987180,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940374945.926, "dur": 0.535, + "args": { + "External id": 987181,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940375047.700, "dur": 114.101, + "args": { + "External id": 987182,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940375170.826, "dur": 10.842, + "args": { + "External id": 987183,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940375178.287, "dur": 1.494, + "args": { + "External id": 987184,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940375183.351, "dur": 33.149, + "args": { + "External id": 987185,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940375222.708, "dur": 7.836, + "args": { + "External id": 987186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940375224.730, "dur": 4.882, + "args": { + "External id": 987187,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940375227.466, "dur": 1.861, + "args": { + "External id": 987188,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940375234.187, "dur": 49.138, + "args": { + "External id": 987189,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940375235.662, "dur": 46.867, + "args": { + "External id": 987190,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940375291.029, "dur": 17.205, + "args": { + "External id": 987191,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940375315.841, "dur": 5.453, + "args": { + "External id": 987192,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940375319.463, "dur": 0.665, + "args": { + "External id": 987193,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940375326.130, "dur": 54.884, + "args": { + "External id": 987194,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940375327.367, "dur": 7.133, + "args": { + "External id": 987195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940375328.521, "dur": 5.229, + "args": { + "External id": 987196,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940375332.781, "dur": 0.813, + "args": { + "External id": 987197,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940375335.315, "dur": 45.331, + "args": { + "External id": 987198,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940375336.255, "dur": 43.644, + "args": { + "External id": 987199,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940375385.668, "dur": 4.715, + "args": { + "External id": 987200,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940375388.393, "dur": 0.500, + "args": { + "External id": 987201,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940375398.056, "dur": 2.167, + "args": { + "External id": 987202,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940375412.775, "dur": 8.399, + "args": { + "External id": 987203,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940375415.410, "dur": 5.426, + "args": { + "External id": 987204,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940375533.413, "dur": 219.741, + "args": { + "External id": 987205,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940375538.596, "dur": 4.718, + "args": { + "External id": 987206,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940375545.537, "dur": 206.921, + "args": { + "External id": 987207,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940375547.417, "dur": 0.568, + "args": { + "External id": 987208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940375549.565, "dur": 24.431, + "args": { + "External id": 987209,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940375578.382, "dur": 5.759, + "args": { + "External id": 987210,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940375582.863, "dur": 0.937, + "args": { + "External id": 987211,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940375585.181, "dur": 25.454, + "args": { + "External id": 987212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940375586.758, "dur": 1.575, + "args": { + "External id": 987213,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940375589.918, "dur": 20.354, + "args": { + "External id": 987214,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940375593.136, "dur": 3.422, + "args": { + "External id": 987215,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940375612.136, "dur": 23.687, + "args": { + "External id": 987216,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940375637.976, "dur": 18.271, + "args": { + "External id": 987217,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940375659.694, "dur": 15.239, + "args": { + "External id": 987218,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940375676.907, "dur": 14.955, + "args": { + "External id": 987219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940375694.128, "dur": 27.374, + "args": { + "External id": 987220,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940375699.680, "dur": 2.399, + "args": { + "External id": 987221,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940375704.622, "dur": 0.844, + "args": { + "External id": 987222,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940375723.268, "dur": 13.679, + "args": { + "External id": 987223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940375738.221, "dur": 12.681, + "args": { + "External id": 987224,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940375761.456, "dur": 2.150, + "args": { + "External id": 987225,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940375774.287, "dur": 4.150, + "args": { + "External id": 987226,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940375776.773, "dur": 0.607, + "args": { + "External id": 987227,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940375856.814, "dur": 64.094, + "args": { + "External id": 987228,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940375928.953, "dur": 5.920, + "args": { + "External id": 987229,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940375932.416, "dur": 1.058, + "args": { + "External id": 987230,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940375936.495, "dur": 26.617, + "args": { + "External id": 987231,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940375968.734, "dur": 6.203, + "args": { + "External id": 987232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940375970.550, "dur": 3.601, + "args": { + "External id": 987233,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940375972.559, "dur": 1.324, + "args": { + "External id": 987234,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940375980.874, "dur": 112.220, + "args": { + "External id": 987235,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940375982.354, "dur": 109.308, + "args": { + "External id": 987236,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940376100.766, "dur": 21.000, + "args": { + "External id": 987237,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940376130.314, "dur": 6.068, + "args": { + "External id": 987238,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940376133.914, "dur": 1.029, + "args": { + "External id": 987239,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940376141.723, "dur": 63.754, + "args": { + "External id": 987240,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940376143.159, "dur": 7.329, + "args": { + "External id": 987241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940376147.119, "dur": 2.595, + "args": { + "External id": 987242,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940376148.992, "dur": 0.551, + "args": { + "External id": 987243,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940376151.478, "dur": 53.512, + "args": { + "External id": 987244,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940376152.582, "dur": 51.866, + "args": { + "External id": 987245,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940376211.947, "dur": 5.319, + "args": { + "External id": 987246,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940376214.935, "dur": 0.789, + "args": { + "External id": 987247,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940376225.723, "dur": 2.208, + "args": { + "External id": 987248,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940376240.959, "dur": 9.611, + "args": { + "External id": 987249,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940376243.413, "dur": 6.820, + "args": { + "External id": 987250,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940376364.226, "dur": 235.501, + "args": { + "External id": 987251,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940376367.067, "dur": 2.597, + "args": { + "External id": 987252,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940376371.710, "dur": 227.210, + "args": { + "External id": 987253,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940376374.004, "dur": 0.633, + "args": { + "External id": 987254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940376376.172, "dur": 28.242, + "args": { + "External id": 987255,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940376408.722, "dur": 3.882, + "args": { + "External id": 987256,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940376411.634, "dur": 0.695, + "args": { + "External id": 987257,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940376413.774, "dur": 29.246, + "args": { + "External id": 987258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940376415.379, "dur": 1.712, + "args": { + "External id": 987259,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940376418.625, "dur": 24.012, + "args": { + "External id": 987260,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940376424.233, "dur": 4.125, + "args": { + "External id": 987261,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940376444.947, "dur": 27.724, + "args": { + "External id": 987262,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940376474.745, "dur": 17.865, + "args": { + "External id": 987263,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940376502.501, "dur": 16.396, + "args": { + "External id": 987264,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940376520.409, "dur": 16.179, + "args": { + "External id": 987265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940376540.979, "dur": 24.935, + "args": { + "External id": 987266,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940376543.653, "dur": 1.686, + "args": { + "External id": 987267,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940376547.594, "dur": 0.820, + "args": { + "External id": 987268,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940376567.489, "dur": 16.618, + "args": { + "External id": 987269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940376585.416, "dur": 12.168, + "args": { + "External id": 987270,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940376608.220, "dur": 2.406, + "args": { + "External id": 987271,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940376624.188, "dur": 4.973, + "args": { + "External id": 987272,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940376627.380, "dur": 0.485, + "args": { + "External id": 987273,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940376708.117, "dur": 61.082, + "args": { + "External id": 987274,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940376775.309, "dur": 7.985, + "args": { + "External id": 987275,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940376778.792, "dur": 3.160, + "args": { + "External id": 987276,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940376785.250, "dur": 30.124, + "args": { + "External id": 987277,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940376820.752, "dur": 9.445, + "args": { + "External id": 987278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940376822.661, "dur": 6.657, + "args": { + "External id": 987279,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940376827.767, "dur": 1.306, + "args": { + "External id": 987280,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940376833.251, "dur": 46.756, + "args": { + "External id": 987281,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940376834.283, "dur": 44.858, + "args": { + "External id": 987282,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940376884.827, "dur": 16.427, + "args": { + "External id": 987283,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940376908.594, "dur": 5.005, + "args": { + "External id": 987284,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940376911.747, "dur": 0.787, + "args": { + "External id": 987285,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940376924.132, "dur": 51.126, + "args": { + "External id": 987286,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940376925.383, "dur": 4.541, + "args": { + "External id": 987287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940376926.285, "dur": 2.906, + "args": { + "External id": 987288,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940376928.261, "dur": 0.670, + "args": { + "External id": 987289,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940376930.759, "dur": 43.988, + "args": { + "External id": 987290,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940376931.680, "dur": 42.188, + "args": { + "External id": 987291,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940376982.329, "dur": 4.318, + "args": { + "External id": 987292,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940376984.655, "dur": 0.661, + "args": { + "External id": 987293,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940376992.946, "dur": 1.847, + "args": { + "External id": 987294,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940377003.618, "dur": 32.497, + "args": { + "External id": 987295,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940377005.666, "dur": 29.686, + "args": { + "External id": 987296,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940377189.275, "dur": 217.504, + "args": { + "External id": 987297,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940377192.661, "dur": 3.532, + "args": { + "External id": 987298,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940377198.342, "dur": 207.737, + "args": { + "External id": 987299,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940377200.552, "dur": 0.397, + "args": { + "External id": 987300,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940377202.686, "dur": 28.798, + "args": { + "External id": 987301,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940377233.560, "dur": 3.849, + "args": { + "External id": 987302,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940377236.140, "dur": 0.864, + "args": { + "External id": 987303,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940377238.585, "dur": 29.771, + "args": { + "External id": 987304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940377239.959, "dur": 1.308, + "args": { + "External id": 987305,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940377242.949, "dur": 25.028, + "args": { + "External id": 987306,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940377251.120, "dur": 3.064, + "args": { + "External id": 987307,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940377270.171, "dur": 24.678, + "args": { + "External id": 987308,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940377296.378, "dur": 15.849, + "args": { + "External id": 987309,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940377315.469, "dur": 15.454, + "args": { + "External id": 987310,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940377332.545, "dur": 13.855, + "args": { + "External id": 987311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940377348.743, "dur": 22.984, + "args": { + "External id": 987312,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940377350.987, "dur": 1.717, + "args": { + "External id": 987313,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940377355.799, "dur": 0.700, + "args": { + "External id": 987314,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940377375.718, "dur": 13.400, + "args": { + "External id": 987315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940377390.580, "dur": 14.233, + "args": { + "External id": 987316,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940377415.343, "dur": 2.957, + "args": { + "External id": 987317,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940377429.294, "dur": 4.858, + "args": { + "External id": 987318,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940377432.783, "dur": 0.354, + "args": { + "External id": 987319,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940377513.280, "dur": 71.838, + "args": { + "External id": 987320,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940377591.174, "dur": 5.490, + "args": { + "External id": 987321,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940377594.391, "dur": 1.056, + "args": { + "External id": 987322,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940377598.299, "dur": 31.822, + "args": { + "External id": 987323,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940377637.988, "dur": 6.413, + "args": { + "External id": 987324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940377639.694, "dur": 3.732, + "args": { + "External id": 987325,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940377642.176, "dur": 0.996, + "args": { + "External id": 987326,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940377647.463, "dur": 47.818, + "args": { + "External id": 987327,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940377648.774, "dur": 45.795, + "args": { + "External id": 987328,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940377700.341, "dur": 18.929, + "args": { + "External id": 987329,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940377726.337, "dur": 7.425, + "args": { + "External id": 987330,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940377731.733, "dur": 0.864, + "args": { + "External id": 987331,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940377738.070, "dur": 59.351, + "args": { + "External id": 987332,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940377739.083, "dur": 6.421, + "args": { + "External id": 987333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940377739.945, "dur": 4.882, + "args": { + "External id": 987334,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940377741.758, "dur": 2.831, + "args": { + "External id": 987335,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940377746.381, "dur": 50.487, + "args": { + "External id": 987336,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940377750.241, "dur": 45.828, + "args": { + "External id": 987337,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940377802.938, "dur": 4.486, + "args": { + "External id": 987338,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940377805.514, "dur": 0.547, + "args": { + "External id": 987339,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940377813.635, "dur": 1.560, + "args": { + "External id": 987340,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940377823.964, "dur": 6.430, + "args": { + "External id": 987341,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940377826.447, "dur": 3.615, + "args": { + "External id": 987342,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940377929.779, "dur": 283.583, + "args": { + "External id": 987343,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940377932.498, "dur": 2.255, + "args": { + "External id": 987344,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940377936.228, "dur": 276.343, + "args": { + "External id": 987345,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940377937.680, "dur": 0.326, + "args": { + "External id": 987346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940377942.249, "dur": 21.785, + "args": { + "External id": 987347,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940377965.748, "dur": 3.749, + "args": { + "External id": 987348,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940377968.451, "dur": 0.857, + "args": { + "External id": 987349,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940377970.628, "dur": 28.523, + "args": { + "External id": 987350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940377971.755, "dur": 4.023, + "args": { + "External id": 987351,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940377977.291, "dur": 21.358, + "args": { + "External id": 987352,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940377982.513, "dur": 3.069, + "args": { + "External id": 987353,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940378000.786, "dur": 47.963, + "args": { + "External id": 987354,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940378051.682, "dur": 61.427, + "args": { + "External id": 987355,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940378118.722, "dur": 18.351, + "args": { + "External id": 987356,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940378138.939, "dur": 13.776, + "args": { + "External id": 987357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940378155.362, "dur": 24.787, + "args": { + "External id": 987358,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940378158.230, "dur": 2.171, + "args": { + "External id": 987359,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940378163.249, "dur": 0.995, + "args": { + "External id": 987360,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940378184.149, "dur": 13.473, + "args": { + "External id": 987361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940378198.919, "dur": 11.992, + "args": { + "External id": 987362,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940378223.671, "dur": 3.168, + "args": { + "External id": 987363,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940378238.973, "dur": 5.295, + "args": { + "External id": 987364,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940378242.316, "dur": 0.916, + "args": { + "External id": 987365,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940378326.522, "dur": 70.648, + "args": { + "External id": 987366,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940378403.084, "dur": 7.608, + "args": { + "External id": 987367,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940378408.388, "dur": 1.049, + "args": { + "External id": 987368,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940378412.340, "dur": 30.440, + "args": { + "External id": 987369,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940378448.330, "dur": 8.498, + "args": { + "External id": 987370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940378449.859, "dur": 5.929, + "args": { + "External id": 987371,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940378452.221, "dur": 3.227, + "args": { + "External id": 987372,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940378460.048, "dur": 46.952, + "args": { + "External id": 987373,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940378461.466, "dur": 44.589, + "args": { + "External id": 987374,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940378513.882, "dur": 17.053, + "args": { + "External id": 987375,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940378538.182, "dur": 4.340, + "args": { + "External id": 987376,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940378540.897, "dur": 0.635, + "args": { + "External id": 987377,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940378547.257, "dur": 53.410, + "args": { + "External id": 987378,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940378548.410, "dur": 7.227, + "args": { + "External id": 987379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940378549.624, "dur": 5.256, + "args": { + "External id": 987380,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940378553.804, "dur": 0.874, + "args": { + "External id": 987381,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940378556.339, "dur": 43.850, + "args": { + "External id": 987382,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940378557.168, "dur": 42.345, + "args": { + "External id": 987383,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940378605.465, "dur": 4.490, + "args": { + "External id": 987384,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940378607.964, "dur": 0.752, + "args": { + "External id": 987385,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940378620.334, "dur": 1.873, + "args": { + "External id": 987386,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940378633.961, "dur": 12.383, + "args": { + "External id": 987387,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940378636.677, "dur": 9.316, + "args": { + "External id": 987388,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940378742.162, "dur": 210.825, + "args": { + "External id": 987389,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940378744.737, "dur": 2.176, + "args": { + "External id": 987390,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940378748.637, "dur": 203.620, + "args": { + "External id": 987391,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940378750.656, "dur": 0.343, + "args": { + "External id": 987392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940378752.664, "dur": 24.716, + "args": { + "External id": 987393,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940378781.578, "dur": 4.017, + "args": { + "External id": 987394,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940378784.577, "dur": 0.770, + "args": { + "External id": 987395,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940378786.920, "dur": 26.551, + "args": { + "External id": 987396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940378788.045, "dur": 1.733, + "args": { + "External id": 987397,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940378791.303, "dur": 21.821, + "args": { + "External id": 987398,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940378797.102, "dur": 2.796, + "args": { + "External id": 987399,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940378815.142, "dur": 23.577, + "args": { + "External id": 987400,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940378840.205, "dur": 15.876, + "args": { + "External id": 987401,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940378859.266, "dur": 15.099, + "args": { + "External id": 987402,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940378876.036, "dur": 14.981, + "args": { + "External id": 987403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940378893.264, "dur": 26.632, + "args": { + "External id": 987404,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940378898.059, "dur": 1.911, + "args": { + "External id": 987405,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940378902.667, "dur": 0.688, + "args": { + "External id": 987406,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940378921.502, "dur": 15.070, + "args": { + "External id": 987407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940378937.672, "dur": 13.160, + "args": { + "External id": 987408,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940378959.842, "dur": 1.876, + "args": { + "External id": 987409,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940378971.585, "dur": 4.401, + "args": { + "External id": 987410,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940378974.383, "dur": 0.465, + "args": { + "External id": 987411,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940379112.301, "dur": 69.885, + "args": { + "External id": 987412,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940379191.506, "dur": 7.391, + "args": { + "External id": 987413,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379195.562, "dur": 1.363, + "args": { + "External id": 987414,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940379200.256, "dur": 34.969, + "args": { + "External id": 987415,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940379240.738, "dur": 6.549, + "args": { + "External id": 987416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940379242.653, "dur": 3.661, + "args": { + "External id": 987417,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379245.273, "dur": 0.782, + "args": { + "External id": 987418,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940379252.975, "dur": 47.738, + "args": { + "External id": 987419,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940379253.923, "dur": 46.006, + "args": { + "External id": 987420,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940379305.551, "dur": 17.270, + "args": { + "External id": 987421,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940379328.657, "dur": 30.983, + "args": { + "External id": 987422,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940379331.641, "dur": 27.401, + "args": { + "External id": 987423,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379340.784, "dur": 0.632, + "args": { + "External id": 987424,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345940379366.268, "dur": 32.272, + "args": { + "External id": 987425,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940379368.386, "dur": 29.848, + "args": { + "External id": 987426,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 20326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379376.415, "dur": 4.405, + "args": { + "External id": 987427,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940379382.315, "dur": 15.272, + "args": { + "External id": 987428,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940379413.246, "dur": 5.391, + "args": { + "External id": 987429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940379415.342, "dur": 2.978, + "args": { + "External id": 987430,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940379420.060, "dur": 1.324, + "args": { + "External id": 987431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940379420.744, "dur": 0.544, + "args": { + "External id": 987432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940379476.736, "dur": 29.476, + "args": { + "External id": 987433,"Sequence number": 10552687, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940379508.720, "dur": 15.980, + "args": { + "External id": 987434,"Sequence number": 10552688, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20334 + } + }, + { + "ph": "s", "id": 233, "pid": 2338708, "tid": 2338708, "ts": 6345940379508.720, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940379532.126, "dur": 7.017, + "args": { + "External id": 987435,"Sequence number": 10552689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], [], []], "Input Dims": [[8, 4, 4096], [], [], [], []], "Ev Idx": 20335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379536.165, "dur": 1.415, + "args": { + "External id": 987436,"Record function id": 0, "Concrete Inputs": ["", "[8, 4, 4096]", "[8192, 1, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 20336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345940379542.373, "dur": 7.214, + "args": { + "External id": 987437,"Sequence number": 10552689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "3"], "Input type": ["long int", "Scalar", "Scalar"], "Input Strides": [[8192, 1, 1], [], []], "Input Dims": [[8, 4, 4096], [], []], "Ev Idx": 20337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379547.573, "dur": 0.604, + "args": { + "External id": 987438,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1, 1], [], [], []], "Input Dims": [[8, 4, 4096], [], [], []], "Ev Idx": 20338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940379551.167, "dur": 5.323, + "args": { + "External id": 987439,"Sequence number": 10552689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], []], "Ev Idx": 20339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379555.290, "dur": 0.498, + "args": { + "External id": 987440,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096]", "[8192, 1]", "4"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[8, 4096], [], [], []], "Ev Idx": 20340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940379561.354, "dur": 6.648, + "args": { + "External id": 987441,"Sequence number": 10552689, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 20341 + } + }, + { + "ph": "s", "id": 232, "pid": 2338708, "tid": 2338708, "ts": 6345940379561.354, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379565.335, "dur": 1.013, + "args": { + "External id": 987442,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940379569.317, "dur": 5.309, + "args": { + "External id": 987443,"Sequence number": 10552690, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], [], []], "Ev Idx": 20343 + } + }, + { + "ph": "s", "id": 231, "pid": 2338708, "tid": 2338708, "ts": 6345940379569.317, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379572.978, "dur": 0.565, + "args": { + "External id": 987444,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4, 4096]", "[67108864, 16384, 4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 2338708, "tid": 2338708, + "ts": 6345940379577.974, "dur": 9.401, + "args": { + "External id": 987445,"Sequence number": 10552691, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "3"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], []], "Input Dims": [[8, 4096, 4, 4096], [], []], "Ev Idx": 20345 + } + }, + { + "ph": "s", "id": 230, "pid": 2338708, "tid": 2338708, "ts": 6345940379577.974, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379583.011, "dur": 3.235, + "args": { + "External id": 987446,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 4096, 1], [], [], []], "Input Dims": [[8, 4096, 4, 4096], [], [], []], "Ev Idx": 20346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940379588.703, "dur": 5.934, + "args": { + "External id": 987447,"Sequence number": 10552692, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "2", "0", "9223372036854775807", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], [], []], "Ev Idx": 20347 + } + }, + { + "ph": "s", "id": 229, "pid": 2338708, "tid": 2338708, "ts": 6345940379588.703, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379592.244, "dur": 1.411, + "args": { + "External id": 987448,"Record function id": 0, "Concrete Inputs": ["", "[8, 4096, 4096]", "[67108864, 16384, 1]", "12288"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[67108864, 16384, 1], [], [], []], "Input Dims": [[8, 4096, 4096], [], [], []], "Ev Idx": 20348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345940379599.063, "dur": 38.939, + "args": { + "External id": 987449,"Sequence number": 10552693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345940379601.074, "dur": 36.634, + "args": { + "External id": 987450,"Sequence number": 10552693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940379604.748, "dur": 7.869, + "args": { + "External id": 987451,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[8, 4096], [], [], [], [], []], "Ev Idx": 20351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940379607.730, "dur": 4.229, + "args": { + "External id": 987452,"Record function id": 0, "Concrete Inputs": ["[8, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940379616.347, "dur": 20.818, + "args": { + "External id": 987453,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[8, 4096], [8, 4096], []], "Ev Idx": 20353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940379669.007, "dur": 5.141, + "args": { + "External id": 987454,"Sequence number": 10552693, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 4096]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[67108864, 16384, 1], []], "Input Dims": [[8, 4096, 4096], []], "Ev Idx": 20354 + } + }, + { + "ph": "s", "id": 228, "pid": 2338708, "tid": 2338708, "ts": 6345940379669.007, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940379677.051, "dur": 1.684, + "args": { + "External id": 987455,"Sequence number": 10552694, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[8, 4096], []], "Ev Idx": 20355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2338708, "tid": 2338708, + "ts": 6345940379718.176, "dur": 45918.668, + "args": { + "External id": 987456,"Sequence number": 10552694, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[16384, 1], [1], [4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [32768], [32000, 4096], [], [], [], [], []], "Ev Idx": 20356 + } + }, + { + "ph": "s", "id": 227, "pid": 2338708, "tid": 2338708, "ts": 6345940379718.176, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2338708, "tid": 2338708, + "ts": 6345940379737.420, "dur": 32.955, + "args": { + "External id": 987457,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345940379738.238, "dur": 31.812, + "args": { + "External id": 987458,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[16384, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940379739.978, "dur": 10.010, + "args": { + "External id": 987459,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "", "0"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[16384, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940379746.495, "dur": 3.035, + "args": { + "External id": 987460,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "15", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940379751.333, "dur": 18.179, + "args": { + "External id": 987461,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [16384, 1], []], "Input Dims": [[32768, 4096], [32768, 4096], []], "Ev Idx": 20361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940379790.090, "dur": 29.144, + "args": { + "External id": 987462,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940379791.621, "dur": 7.934, + "args": { + "External id": 987463,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], [], []], "Ev Idx": 20363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379794.720, "dur": 4.195, + "args": { + "External id": 987464,"Record function id": 0, "Concrete Inputs": ["[32768, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940379801.153, "dur": 17.808, + "args": { + "External id": 987465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940379803.340, "dur": 15.177, + "args": { + "External id": 987466,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32768, 4096], []], "Ev Idx": 20366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940379823.412, "dur": 25.753, + "args": { + "External id": 987467,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940379824.352, "dur": 5.124, + "args": { + "External id": 987468,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379825.922, "dur": 3.239, + "args": { + "External id": 987469,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940379832.607, "dur": 16.329, + "args": { + "External id": 987470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940379833.186, "dur": 15.365, + "args": { + "External id": 987471,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[4096, 1], []], "Input Dims": [[32000, 4096], []], "Ev Idx": 20371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345940379856.338, "dur": 22.595, + "args": { + "External id": 987472,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 20372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940379858.291, "dur": 5.427, + "args": { + "External id": 987473,"Record function id": 0, "Concrete Inputs": ["[32768]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345940379864.538, "dur": 14.028, + "args": { + "External id": 987474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[32768]], "Ev Idx": 20374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940379865.443, "dur": 12.745, + "args": { + "External id": 987475,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2338708, + "ts": 6345940379884.260, "dur": 26.670, + "args": { + "External id": 987476,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940379913.811, "dur": 58.362, + "args": { + "External id": 987477,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940379918.838, "dur": 52.788, + "args": { + "External id": 987478,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379925.898, "dur": 0.717, + "args": { + "External id": 987479,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345940379927.955, "dur": 26.420, + "args": { + "External id": 987480,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940379929.517, "dur": 24.507, + "args": { + "External id": 987481,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[32768], [], [], [], [], [], []], "Ev Idx": 20381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940379932.545, "dur": 3.850, + "args": { + "External id": 987482,"Record function id": 0, "Concrete Inputs": ["[32768]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940379937.399, "dur": 16.145, + "args": { + "External id": 987483,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[32768], [32768], []], "Ev Idx": 20383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2338708, + "ts": 6345940379977.128, "dur": 38749.745, + "args": { + "External id": 987484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 20384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2338708, + "ts": 6345940379979.190, "dur": 38746.050, + "args": { + "External id": 987485,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 20385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940418742.624, "dur": 9.438, + "args": { + "External id": 987486,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940418748.451, "dur": 1.252, + "args": { + "External id": 987487,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940418761.159, "dur": 115.057, + "args": { + "External id": 987488,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940418763.129, "dur": 7.134, + "args": { + "External id": 987489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940418765.706, "dur": 3.494, + "args": { + "External id": 987490,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940418767.984, "dur": 0.929, + "args": { + "External id": 987491,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940418771.701, "dur": 103.707, + "args": { + "External id": 987492,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940418773.567, "dur": 100.877, + "args": { + "External id": 987493,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940418880.393, "dur": 4.957, + "args": { + "External id": 987494,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940418883.007, "dur": 0.824, + "args": { + "External id": 987495,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940418897.375, "dur": 3.377, + "args": { + "External id": 987496,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940418912.467, "dur": 8.046, + "args": { + "External id": 987497,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940418915.182, "dur": 4.987, + "args": { + "External id": 987498,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940419133.374, "dur": 248.241, + "args": { + "External id": 987499,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940419138.768, "dur": 6.853, + "args": { + "External id": 987500,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940419150.558, "dur": 230.276, + "args": { + "External id": 987501,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940419153.052, "dur": 0.672, + "args": { + "External id": 987502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940419155.618, "dur": 33.310, + "args": { + "External id": 987503,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940419191.204, "dur": 4.514, + "args": { + "External id": 987504,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940419194.248, "dur": 1.108, + "args": { + "External id": 987505,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940419196.915, "dur": 30.449, + "args": { + "External id": 987506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940419198.733, "dur": 1.555, + "args": { + "External id": 987507,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940419201.950, "dur": 25.001, + "args": { + "External id": 987508,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940419208.960, "dur": 3.884, + "args": { + "External id": 987509,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940419232.137, "dur": 27.503, + "args": { + "External id": 987510,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940419262.431, "dur": 16.786, + "args": { + "External id": 987511,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940419283.211, "dur": 16.951, + "args": { + "External id": 987512,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940419301.964, "dur": 17.387, + "args": { + "External id": 987513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940419321.431, "dur": 25.132, + "args": { + "External id": 987514,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940419324.326, "dur": 1.960, + "args": { + "External id": 987515,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940419328.646, "dur": 0.768, + "args": { + "External id": 987516,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940419351.095, "dur": 14.042, + "args": { + "External id": 987517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940419366.871, "dur": 12.749, + "args": { + "External id": 987518,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940419391.914, "dur": 3.350, + "args": { + "External id": 987519,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940419404.506, "dur": 5.101, + "args": { + "External id": 987520,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940419407.954, "dur": 0.625, + "args": { + "External id": 987521,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940419506.567, "dur": 80.587, + "args": { + "External id": 987522,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940419594.534, "dur": 9.322, + "args": { + "External id": 987523,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "4096", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940419597.870, "dur": 1.311, + "args": { + "External id": 987524,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940419608.918, "dur": 32.740, + "args": { + "External id": 987525,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940419648.178, "dur": 7.531, + "args": { + "External id": 987526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940419650.255, "dur": 4.141, + "args": { + "External id": 987527,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940419652.855, "dur": 1.268, + "args": { + "External id": 987528,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940419659.504, "dur": 51.491, + "args": { + "External id": 987529,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940419660.950, "dur": 49.310, + "args": { + "External id": 987530,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940419716.536, "dur": 19.348, + "args": { + "External id": 987531,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940419746.244, "dur": 4.786, + "args": { + "External id": 987532,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940419748.933, "dur": 0.585, + "args": { + "External id": 987533,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940419756.460, "dur": 66.460, + "args": { + "External id": 987534,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940419757.960, "dur": 8.720, + "args": { + "External id": 987535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940419758.845, "dur": 7.056, + "args": { + "External id": 987536,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940419760.620, "dur": 5.050, + "args": { + "External id": 987537,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940419774.826, "dur": 47.633, + "args": { + "External id": 987538,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940419775.837, "dur": 45.786, + "args": { + "External id": 987539,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940419828.417, "dur": 4.630, + "args": { + "External id": 987540,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940419830.704, "dur": 0.762, + "args": { + "External id": 987541,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940419840.570, "dur": 1.941, + "args": { + "External id": 987542,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940419852.489, "dur": 10.818, + "args": { + "External id": 987543,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940419857.471, "dur": 5.508, + "args": { + "External id": 987544,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940419977.887, "dur": 296.698, + "args": { + "External id": 987545,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940419982.888, "dur": 2.320, + "args": { + "External id": 987546,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940419986.714, "dur": 287.067, + "args": { + "External id": 987547,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940419988.364, "dur": 0.537, + "args": { + "External id": 987548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940419990.646, "dur": 43.738, + "args": { + "External id": 987549,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940420037.520, "dur": 6.858, + "args": { + "External id": 987550,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940420042.711, "dur": 1.230, + "args": { + "External id": 987551,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940420048.426, "dur": 68.114, + "args": { + "External id": 987552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940420049.874, "dur": 36.845, + "args": { + "External id": 987553,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940420090.072, "dur": 26.162, + "args": { + "External id": 987554,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940420094.418, "dur": 3.445, + "args": { + "External id": 987555,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940420118.518, "dur": 29.439, + "args": { + "External id": 987556,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940420150.038, "dur": 18.323, + "args": { + "External id": 987557,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940420172.141, "dur": 16.488, + "args": { + "External id": 987558,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940420190.525, "dur": 15.445, + "args": { + "External id": 987559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940420208.239, "dur": 32.451, + "args": { + "External id": 987560,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940420213.751, "dur": 2.537, + "args": { + "External id": 987561,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940420221.290, "dur": 0.895, + "args": { + "External id": 987562,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940420242.762, "dur": 15.083, + "args": { + "External id": 987563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940420259.263, "dur": 13.243, + "args": { + "External id": 987564,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940420285.258, "dur": 3.102, + "args": { + "External id": 987565,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940420300.496, "dur": 4.857, + "args": { + "External id": 987566,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940420303.587, "dur": 0.658, + "args": { + "External id": 987567,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "4096"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940420398.706, "dur": 72.782, + "args": { + "External id": 987568,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940420478.030, "dur": 8.118, + "args": { + "External id": 987569,"Record function id": 0, "Concrete Inputs": ["", "0", "4096", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940420484.146, "dur": 0.793, + "args": { + "External id": 987570,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940420487.803, "dur": 31.657, + "args": { + "External id": 987571,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940420525.532, "dur": 7.023, + "args": { + "External id": 987572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940420527.380, "dur": 4.245, + "args": { + "External id": 987573,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940420529.900, "dur": 1.539, + "args": { + "External id": 987574,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940420535.498, "dur": 53.134, + "args": { + "External id": 987575,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940420539.279, "dur": 48.674, + "args": { + "External id": 987576,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940420593.550, "dur": 19.403, + "args": { + "External id": 987577,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940420619.752, "dur": 4.696, + "args": { + "External id": 987578,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940420622.525, "dur": 0.864, + "args": { + "External id": 987579,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940420629.478, "dur": 57.144, + "args": { + "External id": 987580,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940420630.649, "dur": 7.283, + "args": { + "External id": 987581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940420631.679, "dur": 5.519, + "args": { + "External id": 987582,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940420636.237, "dur": 0.811, + "args": { + "External id": 987583,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940420638.800, "dur": 47.390, + "args": { + "External id": 987584,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940420640.092, "dur": 45.217, + "args": { + "External id": 987585,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940420691.493, "dur": 5.093, + "args": { + "External id": 987586,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940420694.269, "dur": 0.894, + "args": { + "External id": 987587,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940420703.151, "dur": 1.869, + "args": { + "External id": 987588,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940420714.880, "dur": 12.435, + "args": { + "External id": 987589,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940420719.898, "dur": 7.000, + "args": { + "External id": 987590,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940420837.390, "dur": 334.649, + "args": { + "External id": 987591,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940420843.227, "dur": 1.983, + "args": { + "External id": 987592,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940420847.347, "dur": 323.934, + "args": { + "External id": 987593,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940420848.845, "dur": 0.545, + "args": { + "External id": 987594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940420850.699, "dur": 25.337, + "args": { + "External id": 987595,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940420878.085, "dur": 6.332, + "args": { + "External id": 987596,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940420882.954, "dur": 1.194, + "args": { + "External id": 987597,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940420888.396, "dur": 27.403, + "args": { + "External id": 987598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940420889.624, "dur": 1.714, + "args": { + "External id": 987599,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940420893.034, "dur": 22.310, + "args": { + "External id": 987600,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940420895.824, "dur": 3.548, + "args": { + "External id": 987601,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940420917.460, "dur": 43.082, + "args": { + "External id": 987602,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940420962.620, "dur": 32.404, + "args": { + "External id": 987603,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940420998.729, "dur": 43.600, + "args": { + "External id": 987604,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940421045.332, "dur": 55.453, + "args": { + "External id": 987605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940421104.681, "dur": 31.942, + "args": { + "External id": 987606,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940421110.542, "dur": 2.740, + "args": { + "External id": 987607,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940421116.189, "dur": 1.059, + "args": { + "External id": 987608,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940421138.618, "dur": 14.925, + "args": { + "External id": 987609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940421155.092, "dur": 14.953, + "args": { + "External id": 987610,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940421181.988, "dur": 3.051, + "args": { + "External id": 987611,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940421197.423, "dur": 4.885, + "args": { + "External id": 987612,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940421200.570, "dur": 0.704, + "args": { + "External id": 987613,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940421291.271, "dur": 71.650, + "args": { + "External id": 987614,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940421369.469, "dur": 5.121, + "args": { + "External id": 987615,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "12288", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940421372.552, "dur": 0.718, + "args": { + "External id": 987616,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940421376.237, "dur": 29.042, + "args": { + "External id": 987617,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940421411.478, "dur": 9.379, + "args": { + "External id": 987618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940421413.822, "dur": 6.208, + "args": { + "External id": 987619,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940421418.574, "dur": 1.186, + "args": { + "External id": 987620,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940421426.702, "dur": 49.010, + "args": { + "External id": 987621,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940421428.212, "dur": 46.727, + "args": { + "External id": 987622,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940421480.464, "dur": 18.947, + "args": { + "External id": 987623,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940421507.051, "dur": 4.518, + "args": { + "External id": 987624,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940421509.792, "dur": 0.675, + "args": { + "External id": 987625,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940421517.144, "dur": 56.301, + "args": { + "External id": 987626,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940421520.761, "dur": 4.454, + "args": { + "External id": 987627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940421521.811, "dur": 2.690, + "args": { + "External id": 987628,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940421523.617, "dur": 0.720, + "args": { + "External id": 987629,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940421526.036, "dur": 47.037, + "args": { + "External id": 987630,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940421526.853, "dur": 45.559, + "args": { + "External id": 987631,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940421578.414, "dur": 6.671, + "args": { + "External id": 987632,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940421581.005, "dur": 2.589, + "args": { + "External id": 987633,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940421595.532, "dur": 1.737, + "args": { + "External id": 987634,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940421606.417, "dur": 9.651, + "args": { + "External id": 987635,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940421608.536, "dur": 7.256, + "args": { + "External id": 987636,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940421721.010, "dur": 227.630, + "args": { + "External id": 987637,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940421723.578, "dur": 2.561, + "args": { + "External id": 987638,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940421730.121, "dur": 217.686, + "args": { + "External id": 987639,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940421732.029, "dur": 0.353, + "args": { + "External id": 987640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940421733.886, "dur": 25.936, + "args": { + "External id": 987641,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940421761.900, "dur": 3.582, + "args": { + "External id": 987642,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940421764.325, "dur": 0.899, + "args": { + "External id": 987643,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940421766.697, "dur": 26.508, + "args": { + "External id": 987644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940421768.196, "dur": 1.946, + "args": { + "External id": 987645,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940421771.628, "dur": 21.200, + "args": { + "External id": 987646,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940421776.665, "dur": 2.664, + "args": { + "External id": 987647,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940421794.769, "dur": 22.498, + "args": { + "External id": 987648,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940421836.364, "dur": 16.210, + "args": { + "External id": 987649,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940421858.375, "dur": 13.862, + "args": { + "External id": 987650,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940421873.633, "dur": 15.304, + "args": { + "External id": 987651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940421890.969, "dur": 23.935, + "args": { + "External id": 987652,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940421893.438, "dur": 1.848, + "args": { + "External id": 987653,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940421897.224, "dur": 0.563, + "args": { + "External id": 987654,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940421916.443, "dur": 14.547, + "args": { + "External id": 987655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940421935.092, "dur": 11.468, + "args": { + "External id": 987656,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940421956.660, "dur": 1.847, + "args": { + "External id": 987657,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940421968.623, "dur": 3.841, + "args": { + "External id": 987658,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940421971.105, "dur": 0.394, + "args": { + "External id": 987659,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "12288"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940422114.072, "dur": 74.189, + "args": { + "External id": 987660,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940422195.806, "dur": 9.565, + "args": { + "External id": 987661,"Record function id": 0, "Concrete Inputs": ["", "0", "12288", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940422199.967, "dur": 3.452, + "args": { + "External id": 987662,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940422207.049, "dur": 31.247, + "args": { + "External id": 987663,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940422244.964, "dur": 9.313, + "args": { + "External id": 987664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940422249.803, "dur": 3.446, + "args": { + "External id": 987665,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940422252.076, "dur": 0.823, + "args": { + "External id": 987666,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940422257.409, "dur": 50.049, + "args": { + "External id": 987667,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940422258.515, "dur": 48.214, + "args": { + "External id": 987668,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940422312.909, "dur": 19.349, + "args": { + "External id": 987669,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940422339.501, "dur": 7.483, + "args": { + "External id": 987670,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940422345.110, "dur": 0.695, + "args": { + "External id": 987671,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940422352.419, "dur": 54.952, + "args": { + "External id": 987672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940422353.439, "dur": 3.856, + "args": { + "External id": 987673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940422354.230, "dur": 2.341, + "args": { + "External id": 987674,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940422355.708, "dur": 0.682, + "args": { + "External id": 987675,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940422358.136, "dur": 48.797, + "args": { + "External id": 987676,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940422358.665, "dur": 47.610, + "args": { + "External id": 987677,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940422413.673, "dur": 4.765, + "args": { + "External id": 987678,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940422416.203, "dur": 0.644, + "args": { + "External id": 987679,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940422429.098, "dur": 1.927, + "args": { + "External id": 987680,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940422441.302, "dur": 7.869, + "args": { + "External id": 987681,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940422443.627, "dur": 5.230, + "args": { + "External id": 987682,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940422564.865, "dur": 231.468, + "args": { + "External id": 987683,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940422567.009, "dur": 4.289, + "args": { + "External id": 987684,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940422575.784, "dur": 219.729, + "args": { + "External id": 987685,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940422577.345, "dur": 0.400, + "args": { + "External id": 987686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940422579.568, "dur": 25.777, + "args": { + "External id": 987687,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940422607.455, "dur": 6.550, + "args": { + "External id": 987688,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940422612.989, "dur": 0.773, + "args": { + "External id": 987689,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940422615.209, "dur": 26.675, + "args": { + "External id": 987690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940422616.332, "dur": 1.235, + "args": { + "External id": 987691,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940422619.149, "dur": 22.208, + "args": { + "External id": 987692,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940422622.041, "dur": 4.325, + "args": { + "External id": 987693,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940422645.954, "dur": 28.306, + "args": { + "External id": 987694,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940422676.119, "dur": 15.970, + "args": { + "External id": 987695,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940422695.626, "dur": 17.912, + "args": { + "External id": 987696,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940422715.534, "dur": 14.752, + "args": { + "External id": 987697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940422732.724, "dur": 27.358, + "args": { + "External id": 987698,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940422735.272, "dur": 1.798, + "args": { + "External id": 987699,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940422739.861, "dur": 2.528, + "args": { + "External id": 987700,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940422764.571, "dur": 14.608, + "args": { + "External id": 987701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940422780.359, "dur": 13.974, + "args": { + "External id": 987702,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940422804.435, "dur": 2.264, + "args": { + "External id": 987703,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940422817.942, "dur": 4.722, + "args": { + "External id": 987704,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940422821.139, "dur": 0.563, + "args": { + "External id": 987705,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940422903.053, "dur": 61.900, + "args": { + "External id": 987706,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940422970.652, "dur": 5.381, + "args": { + "External id": 987707,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "20480", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940422973.858, "dur": 0.878, + "args": { + "External id": 987708,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940422980.250, "dur": 48.440, + "args": { + "External id": 987709,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940423037.242, "dur": 17.062, + "args": { + "External id": 987710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940423045.965, "dur": 7.178, + "args": { + "External id": 987711,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940423049.673, "dur": 3.139, + "args": { + "External id": 987712,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940423096.100, "dur": 59.640, + "args": { + "External id": 987713,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940423097.500, "dur": 57.109, + "args": { + "External id": 987714,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940423165.121, "dur": 20.999, + "args": { + "External id": 987715,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940423194.634, "dur": 5.614, + "args": { + "External id": 987716,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940423197.993, "dur": 0.859, + "args": { + "External id": 987717,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940423205.746, "dur": 56.394, + "args": { + "External id": 987718,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940423206.980, "dur": 7.740, + "args": { + "External id": 987719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940423208.032, "dur": 5.863, + "args": { + "External id": 987720,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940423212.633, "dur": 1.093, + "args": { + "External id": 987721,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940423215.373, "dur": 46.287, + "args": { + "External id": 987722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940423216.144, "dur": 44.836, + "args": { + "External id": 987723,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940423267.015, "dur": 4.562, + "args": { + "External id": 987724,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940423269.653, "dur": 0.549, + "args": { + "External id": 987725,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940423279.842, "dur": 2.112, + "args": { + "External id": 987726,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940423294.500, "dur": 11.134, + "args": { + "External id": 987727,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940423297.177, "dur": 8.197, + "args": { + "External id": 987728,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940423416.672, "dur": 223.403, + "args": { + "External id": 987729,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940423419.348, "dur": 1.912, + "args": { + "External id": 987730,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940423425.323, "dur": 213.846, + "args": { + "External id": 987731,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940423426.809, "dur": 0.603, + "args": { + "External id": 987732,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940423429.088, "dur": 27.212, + "args": { + "External id": 987733,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940423460.589, "dur": 5.842, + "args": { + "External id": 987734,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940423463.359, "dur": 2.696, + "args": { + "External id": 987735,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940423467.620, "dur": 24.959, + "args": { + "External id": 987736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940423468.880, "dur": 1.130, + "args": { + "External id": 987737,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940423471.694, "dur": 20.407, + "args": { + "External id": 987738,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940423474.881, "dur": 2.951, + "args": { + "External id": 987739,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940423494.240, "dur": 24.475, + "args": { + "External id": 987740,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940423520.420, "dur": 17.753, + "args": { + "External id": 987741,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940423541.395, "dur": 15.898, + "args": { + "External id": 987742,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940423558.803, "dur": 16.048, + "args": { + "External id": 987743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940423576.982, "dur": 29.481, + "args": { + "External id": 987744,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940423581.828, "dur": 1.938, + "args": { + "External id": 987745,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940423586.568, "dur": 2.703, + "args": { + "External id": 987746,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940423608.569, "dur": 16.044, + "args": { + "External id": 987747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940423625.905, "dur": 11.683, + "args": { + "External id": 987748,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940423648.019, "dur": 1.766, + "args": { + "External id": 987749,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940423659.755, "dur": 4.468, + "args": { + "External id": 987750,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940423662.913, "dur": 0.436, + "args": { + "External id": 987751,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "20480"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940423743.134, "dur": 58.400, + "args": { + "External id": 987752,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940423809.634, "dur": 5.235, + "args": { + "External id": 987753,"Record function id": 0, "Concrete Inputs": ["", "0", "20480", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940423812.732, "dur": 0.729, + "args": { + "External id": 987754,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940423816.299, "dur": 28.034, + "args": { + "External id": 987755,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940423849.834, "dur": 6.506, + "args": { + "External id": 987756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940423851.350, "dur": 3.890, + "args": { + "External id": 987757,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940423853.706, "dur": 1.309, + "args": { + "External id": 987758,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940423862.477, "dur": 47.139, + "args": { + "External id": 987759,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940423863.537, "dur": 45.352, + "args": { + "External id": 987760,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940423914.352, "dur": 17.912, + "args": { + "External id": 987761,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940423938.489, "dur": 4.714, + "args": { + "External id": 987762,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940423941.212, "dur": 0.807, + "args": { + "External id": 987763,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940423947.504, "dur": 50.795, + "args": { + "External id": 987764,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940423948.469, "dur": 6.519, + "args": { + "External id": 987765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940423952.269, "dur": 2.097, + "args": { + "External id": 987766,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940423953.725, "dur": 0.489, + "args": { + "External id": 987767,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940423955.626, "dur": 42.078, + "args": { + "External id": 987768,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940423956.433, "dur": 40.574, + "args": { + "External id": 987769,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940424003.234, "dur": 23.752, + "args": { + "External id": 987770,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940424005.664, "dur": 19.064, + "args": { + "External id": 987771,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940424037.642, "dur": 2.006, + "args": { + "External id": 987772,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940424051.980, "dur": 45.501, + "args": { + "External id": 987773,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940424054.470, "dur": 42.108, + "args": { + "External id": 987774,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940424207.971, "dur": 224.550, + "args": { + "External id": 987775,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940424213.457, "dur": 2.512, + "args": { + "External id": 987776,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940424217.605, "dur": 214.044, + "args": { + "External id": 987777,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940424219.355, "dur": 0.363, + "args": { + "External id": 987778,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940424221.374, "dur": 24.326, + "args": { + "External id": 987779,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940424250.324, "dur": 6.701, + "args": { + "External id": 987780,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940424255.624, "dur": 0.975, + "args": { + "External id": 987781,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940424258.344, "dur": 24.549, + "args": { + "External id": 987782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940424259.420, "dur": 1.508, + "args": { + "External id": 987783,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940424262.522, "dur": 19.872, + "args": { + "External id": 987784,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940424265.452, "dur": 3.600, + "args": { + "External id": 987785,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940424284.461, "dur": 24.093, + "args": { + "External id": 987786,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940424310.251, "dur": 15.886, + "args": { + "External id": 987787,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940424329.606, "dur": 17.147, + "args": { + "External id": 987788,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940424351.073, "dur": 15.143, + "args": { + "External id": 987789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940424371.198, "dur": 27.264, + "args": { + "External id": 987790,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940424376.176, "dur": 1.466, + "args": { + "External id": 987791,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940424380.952, "dur": 0.785, + "args": { + "External id": 987792,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940424400.266, "dur": 14.704, + "args": { + "External id": 987793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940424416.321, "dur": 14.282, + "args": { + "External id": 987794,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940424440.851, "dur": 2.407, + "args": { + "External id": 987795,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940424457.298, "dur": 4.726, + "args": { + "External id": 987796,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940424460.500, "dur": 0.437, + "args": { + "External id": 987797,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940424537.085, "dur": 67.580, + "args": { + "External id": 987798,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940424610.280, "dur": 5.505, + "args": { + "External id": 987799,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "28672", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940424613.703, "dur": 0.655, + "args": { + "External id": 987800,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940424617.212, "dur": 28.180, + "args": { + "External id": 987801,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940424650.975, "dur": 8.439, + "args": { + "External id": 987802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940424652.738, "dur": 5.828, + "args": { + "External id": 987803,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940424657.255, "dur": 1.133, + "args": { + "External id": 987804,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940424662.434, "dur": 49.333, + "args": { + "External id": 987805,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940424663.450, "dur": 47.580, + "args": { + "External id": 987806,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940424717.005, "dur": 16.860, + "args": { + "External id": 987807,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940424740.948, "dur": 4.267, + "args": { + "External id": 987808,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940424743.535, "dur": 0.573, + "args": { + "External id": 987809,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2338708, "tid": 2338708, + "ts": 6345940424752.269, "dur": 48.716, + "args": { + "External id": 987810,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [32000, 4096], []], "Ev Idx": 20710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940424753.243, "dur": 3.790, + "args": { + "External id": 987811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940424754.039, "dur": 2.309, + "args": { + "External id": 987812,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[32000, 4096], [], []], "Ev Idx": 20712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940424755.661, "dur": 0.532, + "args": { + "External id": 987813,"Record function id": 0, "Concrete Inputs": ["", "[4096, 32000]", "[1, 4096]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32000, 4096], [], [], []], "Ev Idx": 20713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940424757.930, "dur": 42.661, + "args": { + "External id": 987814,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940424758.487, "dur": 41.451, + "args": { + "External id": 987815,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[4096, 1], [1, 4096]], "Input Dims": [[4096, 4096], [4096, 32000]], "Ev Idx": 20715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940424808.080, "dur": 4.398, + "args": { + "External id": 987816,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940424810.528, "dur": 0.565, + "args": { + "External id": 987817,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940424818.781, "dur": 1.518, + "args": { + "External id": 987818,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[4096, 32000], []], "Ev Idx": 20718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940424828.685, "dur": 6.555, + "args": { + "External id": 987819,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[4096, 32000], [], [], [], [], []], "Ev Idx": 20719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940424830.953, "dur": 3.932, + "args": { + "External id": 987820,"Record function id": 0, "Concrete Inputs": ["[4096, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940424927.868, "dur": 270.549, + "args": { + "External id": 987821,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940424932.548, "dur": 3.297, + "args": { + "External id": 987822,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2338708, "tid": 2338708, + "ts": 6345940424937.728, "dur": 259.813, + "args": { + "External id": 987823,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[4096, 1], [], [], [0]], "Ev Idx": 20723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2338708, "tid": 2338708, + "ts": 6345940424939.048, "dur": 0.313, + "args": { + "External id": 987824,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2338708, "tid": 2338708, + "ts": 6345940424941.508, "dur": 21.540, + "args": { + "External id": 987825,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[4096, 1], [], []], "Ev Idx": 20725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2338708, "tid": 2338708, + "ts": 6345940424964.679, "dur": 4.941, + "args": { + "External id": 987826,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[4096, 1], []], "Ev Idx": 20726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940424968.705, "dur": 0.661, + "args": { + "External id": 987827,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[4096, 1], [], [], []], "Ev Idx": 20727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940424970.701, "dur": 25.319, + "args": { + "External id": 987828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345940424971.846, "dur": 1.207, + "args": { + "External id": 987829,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345940424974.367, "dur": 21.311, + "args": { + "External id": 987830,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[4096], [0]], "Ev Idx": 20730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940424980.244, "dur": 2.666, + "args": { + "External id": 987831,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345940424997.529, "dur": 43.581, + "args": { + "External id": 987832,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940425043.941, "dur": 54.271, + "args": { + "External id": 987833,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2338708, "tid": 2338708, + "ts": 6345940425102.933, "dur": 17.273, + "args": { + "External id": 987834,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[4096, 1], [4096, 1], []], "Ev Idx": 20734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2338708, "tid": 2338708, + "ts": 6345940425121.749, "dur": 14.300, + "args": { + "External id": 987835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[4096, 1]], "Ev Idx": 20735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940425138.143, "dur": 25.218, + "args": { + "External id": 987836,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[4096, 1], [], [], [], [0]], "Ev Idx": 20736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345940425140.463, "dur": 2.235, + "args": { + "External id": 987837,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 20737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940425145.487, "dur": 0.739, + "args": { + "External id": 987838,"Record function id": 0, "Concrete Inputs": ["", "[4096, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 20738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2338708, "tid": 2338708, + "ts": 6345940425167.472, "dur": 14.796, + "args": { + "External id": 987839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[4096]], "Ev Idx": 20739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940425183.426, "dur": 12.830, + "args": { + "External id": 987840,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[4096], [4096], []], "Ev Idx": 20740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345940425208.792, "dur": 2.588, + "args": { + "External id": 987841,"Record function id": 0, "Concrete Inputs": ["", "[4096]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 20741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940425222.855, "dur": 4.874, + "args": { + "External id": 987842,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[32768], [], [], [], []], "Ev Idx": 20742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940425226.340, "dur": 0.479, + "args": { + "External id": 987843,"Record function id": 0, "Concrete Inputs": ["", "[4096]", "[1]", "28672"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940425310.483, "dur": 65.671, + "args": { + "External id": 987844,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [4096, 1]], "Input Dims": [[4096, 32000], [32000, 4096]], "Ev Idx": 20744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2338708, "tid": 2338708, + "ts": 6345940425382.032, "dur": 4.813, + "args": { + "External id": 987845,"Record function id": 0, "Concrete Inputs": ["", "0", "28672", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[32768, 4096], [], [], [], []], "Ev Idx": 20745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940425384.747, "dur": 0.787, + "args": { + "External id": 987846,"Record function id": 0, "Concrete Inputs": ["", "[4096, 4096]", "[4096, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[32768, 4096], [], [], []], "Ev Idx": 20746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940425388.763, "dur": 28.429, + "args": { + "External id": 987847,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[4096, 4096], [4096, 4096], []], "Ev Idx": 20747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2338708, "tid": 2338708, + "ts": 6345940425425.081, "dur": 6.992, + "args": { + "External id": 987848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[4096, 32000]], "Ev Idx": 20748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2338708, "tid": 2338708, + "ts": 6345940425427.107, "dur": 4.132, + "args": { + "External id": 987849,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[4096, 32000], [], []], "Ev Idx": 20749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940425429.494, "dur": 1.483, + "args": { + "External id": 987850,"Record function id": 0, "Concrete Inputs": ["", "[32000, 4096]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[4096, 32000], [], [], []], "Ev Idx": 20750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2338708, "tid": 2338708, + "ts": 6345940425435.341, "dur": 46.280, + "args": { + "External id": 987851,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2338708, "tid": 2338708, + "ts": 6345940425436.485, "dur": 44.426, + "args": { + "External id": 987852,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [4096, 1]], "Input Dims": [[32000, 4096], [4096, 4096]], "Ev Idx": 20752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940425486.297, "dur": 17.064, + "args": { + "External id": 987853,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940425508.567, "dur": 34.127, + "args": { + "External id": 987854,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[32768], []], "Ev Idx": 20754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2338708, "tid": 2338708, + "ts": 6345940425513.726, "dur": 28.481, + "args": { + "External id": 987855,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[32768], [], [], []], "Ev Idx": 20755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940425522.749, "dur": 0.665, + "args": { + "External id": 987856,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 20756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345940425548.903, "dur": 32.403, + "args": { + "External id": 987857,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], []], "Ev Idx": 20757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2338708, "tid": 2338708, + "ts": 6345940425551.015, "dur": 30.007, + "args": { + "External id": 987858,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[32000, 4096], [], [], [], [], [], []], "Ev Idx": 20758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940425556.986, "dur": 3.941, + "args": { + "External id": 987859,"Record function id": 0, "Concrete Inputs": ["[32000, 4096]", "[4096, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345940425562.418, "dur": 18.019, + "args": { + "External id": 987860,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[32000, 4096], [32000, 4096], []], "Ev Idx": 20760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940425596.229, "dur": 5.669, + "args": { + "External id": 987861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940425598.480, "dur": 3.073, + "args": { + "External id": 987862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32768, 4096]], "Ev Idx": 20762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940425603.601, "dur": 3.766, + "args": { + "External id": 987863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2338708, "tid": 2338708, + "ts": 6345940425606.498, "dur": 0.775, + "args": { + "External id": 987864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[4096, 1]], "Input Dims": [[32000, 4096]], "Ev Idx": 20764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940425663.737, "dur": 26.410, + "args": { + "External id": 987865,"Sequence number": 10552695, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2338708, "tid": 2338708, + "ts": 6345940425692.752, "dur": 16.967, + "args": { + "External id": 987866,"Sequence number": 10552696, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20766 + } + }, + { + "ph": "s", "id": 226, "pid": 2338708, "tid": 2338708, "ts": 6345940425692.752, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 2338708, "tid": 2338708, + "ts": 6345940425845.610, "dur": 49.694, + "args": { + "External id": 987867,"Record function id": 0, "Ev Idx": 20767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338708, "tid": 2338708, + "ts": 6345940426050.181, "dur": 90.275, + "args": { + "External id": 987868,"Sequence number": 10552697, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20768 + } + }, + { + "ph": "s", "id": 225, "pid": 2338708, "tid": 2338708, "ts": 6345940426050.181, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940426184.579, "dur": 34.234, + "args": { + "External id": 987869,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 20769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345940426186.770, "dur": 11.930, + "args": { + "External id": 987870,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[1], [], [], [], [], []], "Ev Idx": 20770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345940426192.799, "dur": 5.141, + "args": { + "External id": 987871,"Record function id": 0, "Concrete Inputs": ["[1]", "[1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345940426200.797, "dur": 17.583, + "args": { + "External id": 987872,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338708, "tid": 2338708, + "ts": 6345942513331.094, "dur": 72.326, + "args": { + "External id": 987873,"Sequence number": 10552698, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 20773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2338708, "tid": 2338708, + "ts": 6345942513412.165, "dur": 36.092, + "args": { + "External id": 987874,"Sequence number": 10552699, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[1], []], "Input Dims": [[1], []], "Ev Idx": 20774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345942513640.142, "dur": 29.498, + "args": { + "External id": 987875,"Sequence number": 10552700, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345942513674.485, "dur": 19.645, + "args": { + "External id": 987876,"Sequence number": 10552701, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[1], [1], []], "Ev Idx": 20776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345942514887.192, "dur": 53.037, + "args": { + "External id": 987877,"Sequence number": 10552702, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345942514945.143, "dur": 17.843, + "args": { + "External id": 987878,"Sequence number": 10552703, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345942514975.056, "dur": 18.610, + "args": { + "External id": 987879,"Sequence number": 10552704, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[1], [], []], "Ev Idx": 20779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345942514995.982, "dur": 32.107, + "args": { + "External id": 987880,"Sequence number": 10552705, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[1], [1], []], "Ev Idx": 20780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2338708, "tid": 2338708, + "ts": 6345942517658.437, "dur": 3784.357, + "args": { + "External id": 987881,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2338708, "tid": 2338708, + "ts": 6345942518317.378, "dur": 1452.523, + "args": { + "External id": 987882,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 20782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2338708, "tid": 2338708, + "ts": 6345942518343.470, "dur": 85.026, + "args": { + "External id": 987883,"Record function id": 0, "Concrete Inputs": ["[68250]", "6", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 20783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345942518347.837, "dur": 15.252, + "args": { + "External id": 987884,"Record function id": 0, "Concrete Inputs": ["[68250]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 20784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2338708, "tid": 2338708, + "ts": 6345942518369.246, "dur": 58.877, + "args": { + "External id": 987885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[68250]], "Ev Idx": 20785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2338708, "tid": 2338708, + "ts": 6345942518374.059, "dur": 53.374, + "args": { + "External id": 987886,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[68250], []], "Ev Idx": 20786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521482.684, "dur": 4.653, + "args": { + "External id": 987887,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521489.938, "dur": 0.390, + "args": { + "External id": 987888,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521491.913, "dur": 0.433, + "args": { + "External id": 987889,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521493.803, "dur": 0.432, + "args": { + "External id": 987890,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521495.890, "dur": 0.220, + "args": { + "External id": 987891,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521497.623, "dur": 0.323, + "args": { + "External id": 987892,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521499.700, "dur": 0.363, + "args": { + "External id": 987893,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521503.830, "dur": 0.412, + "args": { + "External id": 987894,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521505.582, "dur": 0.540, + "args": { + "External id": 987895,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521507.284, "dur": 0.446, + "args": { + "External id": 987896,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521508.775, "dur": 0.253, + "args": { + "External id": 987897,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521510.252, "dur": 0.278, + "args": { + "External id": 987898,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521511.711, "dur": 0.303, + "args": { + "External id": 987899,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521513.266, "dur": 0.290, + "args": { + "External id": 987900,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521514.656, "dur": 0.407, + "args": { + "External id": 987901,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521518.734, "dur": 0.264, + "args": { + "External id": 987902,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521519.902, "dur": 0.448, + "args": { + "External id": 987903,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521521.650, "dur": 0.405, + "args": { + "External id": 987904,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521523.189, "dur": 0.287, + "args": { + "External id": 987905,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521524.418, "dur": 0.253, + "args": { + "External id": 987906,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521525.829, "dur": 0.248, + "args": { + "External id": 987907,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521527.337, "dur": 0.265, + "args": { + "External id": 987908,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521529.058, "dur": 0.284, + "args": { + "External id": 987909,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521532.830, "dur": 0.272, + "args": { + "External id": 987910,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521534.317, "dur": 0.248, + "args": { + "External id": 987911,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521536.057, "dur": 0.273, + "args": { + "External id": 987912,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521537.470, "dur": 0.241, + "args": { + "External id": 987913,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521538.802, "dur": 0.446, + "args": { + "External id": 987914,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521540.185, "dur": 0.251, + "args": { + "External id": 987915,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521541.523, "dur": 0.262, + "args": { + "External id": 987916,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521542.793, "dur": 0.375, + "args": { + "External id": 987917,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521546.706, "dur": 0.319, + "args": { + "External id": 987918,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521547.878, "dur": 0.401, + "args": { + "External id": 987919,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521549.416, "dur": 0.428, + "args": { + "External id": 987920,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521551.037, "dur": 0.318, + "args": { + "External id": 987921,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521552.233, "dur": 0.408, + "args": { + "External id": 987922,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521553.775, "dur": 0.267, + "args": { + "External id": 987923,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521554.894, "dur": 0.390, + "args": { + "External id": 987924,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521556.385, "dur": 0.259, + "args": { + "External id": 987925,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521559.981, "dur": 0.430, + "args": { + "External id": 987926,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521561.550, "dur": 0.359, + "args": { + "External id": 987927,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521563.439, "dur": 0.431, + "args": { + "External id": 987928,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521566.752, "dur": 0.262, + "args": { + "External id": 987929,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521567.922, "dur": 0.368, + "args": { + "External id": 987930,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521569.398, "dur": 0.318, + "args": { + "External id": 987931,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521570.723, "dur": 0.260, + "args": { + "External id": 987932,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521571.873, "dur": 0.385, + "args": { + "External id": 987933,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521575.594, "dur": 0.337, + "args": { + "External id": 987934,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521576.928, "dur": 0.242, + "args": { + "External id": 987935,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521578.031, "dur": 0.252, + "args": { + "External id": 987936,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521579.196, "dur": 0.244, + "args": { + "External id": 987937,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521580.283, "dur": 0.251, + "args": { + "External id": 987938,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521581.450, "dur": 0.247, + "args": { + "External id": 987939,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521582.765, "dur": 0.251, + "args": { + "External id": 987940,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521584.077, "dur": 0.245, + "args": { + "External id": 987941,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521587.637, "dur": 0.254, + "args": { + "External id": 987942,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521588.945, "dur": 0.246, + "args": { + "External id": 987943,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521590.216, "dur": 0.270, + "args": { + "External id": 987944,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521591.726, "dur": 0.245, + "args": { + "External id": 987945,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521593.027, "dur": 0.439, + "args": { + "External id": 987946,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521594.437, "dur": 0.371, + "args": { + "External id": 987947,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521595.693, "dur": 0.350, + "args": { + "External id": 987948,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521596.985, "dur": 0.392, + "args": { + "External id": 987949,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521600.595, "dur": 0.397, + "args": { + "External id": 987950,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521602.050, "dur": 0.387, + "args": { + "External id": 987951,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521603.749, "dur": 0.381, + "args": { + "External id": 987952,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521605.188, "dur": 0.246, + "args": { + "External id": 987953,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521606.496, "dur": 0.380, + "args": { + "External id": 987954,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521608.074, "dur": 0.265, + "args": { + "External id": 987955,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521609.534, "dur": 0.252, + "args": { + "External id": 987956,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521610.670, "dur": 0.255, + "args": { + "External id": 987957,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521613.937, "dur": 0.252, + "args": { + "External id": 987958,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521615.073, "dur": 0.244, + "args": { + "External id": 987959,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521616.171, "dur": 0.253, + "args": { + "External id": 987960,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521617.308, "dur": 0.249, + "args": { + "External id": 987961,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521618.463, "dur": 0.256, + "args": { + "External id": 987962,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521619.790, "dur": 0.258, + "args": { + "External id": 987963,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521621.936, "dur": 0.261, + "args": { + "External id": 987964,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521623.211, "dur": 0.246, + "args": { + "External id": 987965,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521626.672, "dur": 0.255, + "args": { + "External id": 987966,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521627.888, "dur": 0.248, + "args": { + "External id": 987967,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521629.007, "dur": 0.252, + "args": { + "External id": 987968,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521630.120, "dur": 0.243, + "args": { + "External id": 987969,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521631.334, "dur": 0.249, + "args": { + "External id": 987970,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521632.645, "dur": 0.294, + "args": { + "External id": 987971,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521633.839, "dur": 0.248, + "args": { + "External id": 987972,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521634.994, "dur": 0.242, + "args": { + "External id": 987973,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521638.594, "dur": 0.252, + "args": { + "External id": 987974,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521639.743, "dur": 0.243, + "args": { + "External id": 987975,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521641.077, "dur": 0.253, + "args": { + "External id": 987976,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521642.199, "dur": 0.264, + "args": { + "External id": 987977,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521643.316, "dur": 0.254, + "args": { + "External id": 987978,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521644.462, "dur": 0.361, + "args": { + "External id": 987979,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521646.309, "dur": 0.380, + "args": { + "External id": 987980,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521647.856, "dur": 0.381, + "args": { + "External id": 987981,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521651.269, "dur": 0.247, + "args": { + "External id": 987982,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521652.530, "dur": 0.403, + "args": { + "External id": 987983,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521654.215, "dur": 0.268, + "args": { + "External id": 987984,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521655.656, "dur": 0.269, + "args": { + "External id": 987985,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521656.985, "dur": 0.360, + "args": { + "External id": 987986,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521658.550, "dur": 0.381, + "args": { + "External id": 987987,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521659.959, "dur": 0.258, + "args": { + "External id": 987988,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521661.129, "dur": 0.244, + "args": { + "External id": 987989,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521665.134, "dur": 0.459, + "args": { + "External id": 987990,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521666.570, "dur": 0.244, + "args": { + "External id": 987991,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521667.699, "dur": 0.264, + "args": { + "External id": 987992,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521668.897, "dur": 0.245, + "args": { + "External id": 987993,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521670.133, "dur": 0.250, + "args": { + "External id": 987994,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521671.373, "dur": 0.243, + "args": { + "External id": 987995,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521673.318, "dur": 0.249, + "args": { + "External id": 987996,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521674.823, "dur": 0.411, + "args": { + "External id": 987997,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521678.423, "dur": 0.434, + "args": { + "External id": 987998,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521679.853, "dur": 0.460, + "args": { + "External id": 987999,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521681.391, "dur": 0.386, + "args": { + "External id": 988000,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521682.905, "dur": 0.245, + "args": { + "External id": 988001,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521684.224, "dur": 0.391, + "args": { + "External id": 988002,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521685.657, "dur": 0.392, + "args": { + "External id": 988003,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521687.126, "dur": 0.369, + "args": { + "External id": 988004,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521688.453, "dur": 0.363, + "args": { + "External id": 988005,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521692.174, "dur": 0.254, + "args": { + "External id": 988006,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521693.395, "dur": 0.244, + "args": { + "External id": 988007,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521694.557, "dur": 0.251, + "args": { + "External id": 988008,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521695.739, "dur": 0.253, + "args": { + "External id": 988009,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521696.850, "dur": 0.249, + "args": { + "External id": 988010,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521698.046, "dur": 0.250, + "args": { + "External id": 988011,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521699.330, "dur": 0.255, + "args": { + "External id": 988012,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521700.496, "dur": 0.243, + "args": { + "External id": 988013,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521703.919, "dur": 0.250, + "args": { + "External id": 988014,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521705.103, "dur": 0.244, + "args": { + "External id": 988015,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521707.029, "dur": 0.271, + "args": { + "External id": 988016,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521708.269, "dur": 0.253, + "args": { + "External id": 988017,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521709.410, "dur": 0.247, + "args": { + "External id": 988018,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521710.536, "dur": 0.242, + "args": { + "External id": 988019,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521711.709, "dur": 0.432, + "args": { + "External id": 988020,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521713.036, "dur": 0.361, + "args": { + "External id": 988021,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521716.374, "dur": 0.383, + "args": { + "External id": 988022,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521717.736, "dur": 0.346, + "args": { + "External id": 988023,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521719.010, "dur": 0.433, + "args": { + "External id": 988024,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521720.508, "dur": 0.260, + "args": { + "External id": 988025,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521721.662, "dur": 0.372, + "args": { + "External id": 988026,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521723.049, "dur": 0.397, + "args": { + "External id": 988027,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521724.323, "dur": 0.359, + "args": { + "External id": 988028,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521725.629, "dur": 0.240, + "args": { + "External id": 988029,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521728.853, "dur": 0.250, + "args": { + "External id": 988030,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521730.030, "dur": 0.233, + "args": { + "External id": 988031,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521731.269, "dur": 0.268, + "args": { + "External id": 988032,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521732.531, "dur": 0.251, + "args": { + "External id": 988033,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521733.660, "dur": 0.247, + "args": { + "External id": 988034,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521734.783, "dur": 0.247, + "args": { + "External id": 988035,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521736.565, "dur": 0.252, + "args": { + "External id": 988036,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521737.894, "dur": 0.242, + "args": { + "External id": 988037,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521741.376, "dur": 0.255, + "args": { + "External id": 988038,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521742.545, "dur": 0.246, + "args": { + "External id": 988039,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521743.995, "dur": 0.248, + "args": { + "External id": 988040,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521745.192, "dur": 0.247, + "args": { + "External id": 988041,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521746.477, "dur": 0.253, + "args": { + "External id": 988042,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521749.793, "dur": 0.272, + "args": { + "External id": 988043,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521751.062, "dur": 0.250, + "args": { + "External id": 988044,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521752.206, "dur": 0.244, + "args": { + "External id": 988045,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521755.615, "dur": 0.255, + "args": { + "External id": 988046,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521756.762, "dur": 0.243, + "args": { + "External id": 988047,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521757.927, "dur": 0.249, + "args": { + "External id": 988048,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521759.295, "dur": 0.245, + "args": { + "External id": 988049,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521760.395, "dur": 0.362, + "args": { + "External id": 988050,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521761.666, "dur": 0.370, + "args": { + "External id": 988051,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521763.002, "dur": 0.368, + "args": { + "External id": 988052,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521764.300, "dur": 0.372, + "args": { + "External id": 988053,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521767.970, "dur": 0.415, + "args": { + "External id": 988054,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521769.257, "dur": 0.259, + "args": { + "External id": 988055,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521770.474, "dur": 0.351, + "args": { + "External id": 988056,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521771.787, "dur": 0.246, + "args": { + "External id": 988057,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521772.903, "dur": 0.252, + "args": { + "External id": 988058,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521774.040, "dur": 0.245, + "args": { + "External id": 988059,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521775.730, "dur": 0.262, + "args": { + "External id": 988060,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521777.083, "dur": 0.247, + "args": { + "External id": 988061,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521780.365, "dur": 0.257, + "args": { + "External id": 988062,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521781.846, "dur": 0.244, + "args": { + "External id": 988063,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521782.997, "dur": 0.252, + "args": { + "External id": 988064,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521784.223, "dur": 0.252, + "args": { + "External id": 988065,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521785.370, "dur": 0.251, + "args": { + "External id": 988066,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521786.536, "dur": 0.245, + "args": { + "External id": 988067,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521787.970, "dur": 0.255, + "args": { + "External id": 988068,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521789.310, "dur": 0.246, + "args": { + "External id": 988069,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521792.755, "dur": 0.258, + "args": { + "External id": 988070,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521794.057, "dur": 0.245, + "args": { + "External id": 988071,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521795.270, "dur": 0.251, + "args": { + "External id": 988072,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521796.495, "dur": 0.245, + "args": { + "External id": 988073,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521797.670, "dur": 0.252, + "args": { + "External id": 988074,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521798.930, "dur": 0.245, + "args": { + "External id": 988075,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521800.123, "dur": 0.252, + "args": { + "External id": 988076,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521805.688, "dur": 0.252, + "args": { + "External id": 988077,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521808.950, "dur": 0.275, + "args": { + "External id": 988078,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521810.327, "dur": 0.246, + "args": { + "External id": 988079,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521811.614, "dur": 0.269, + "args": { + "External id": 988080,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521813.101, "dur": 0.244, + "args": { + "External id": 988081,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521814.541, "dur": 0.249, + "args": { + "External id": 988082,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521815.798, "dur": 0.247, + "args": { + "External id": 988083,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521817.029, "dur": 0.260, + "args": { + "External id": 988084,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521818.168, "dur": 0.242, + "args": { + "External id": 988085,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521821.770, "dur": 0.255, + "args": { + "External id": 988086,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521822.920, "dur": 0.376, + "args": { + "External id": 988087,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521824.585, "dur": 0.414, + "args": { + "External id": 988088,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521826.090, "dur": 0.242, + "args": { + "External id": 988089,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521827.203, "dur": 0.249, + "args": { + "External id": 988090,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521828.383, "dur": 0.247, + "args": { + "External id": 988091,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521829.635, "dur": 0.259, + "args": { + "External id": 988092,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521830.806, "dur": 0.417, + "args": { + "External id": 988093,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521834.687, "dur": 0.378, + "args": { + "External id": 988094,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521835.958, "dur": 0.246, + "args": { + "External id": 988095,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521837.066, "dur": 0.255, + "args": { + "External id": 988096,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521838.195, "dur": 0.248, + "args": { + "External id": 988097,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521839.371, "dur": 0.256, + "args": { + "External id": 988098,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521840.583, "dur": 0.247, + "args": { + "External id": 988099,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 20999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521841.741, "dur": 0.257, + "args": { + "External id": 988100,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521842.954, "dur": 0.249, + "args": { + "External id": 988101,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521846.043, "dur": 0.256, + "args": { + "External id": 988102,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521847.183, "dur": 0.246, + "args": { + "External id": 988103,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521848.320, "dur": 0.250, + "args": { + "External id": 988104,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521849.462, "dur": 0.244, + "args": { + "External id": 988105,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521850.669, "dur": 0.258, + "args": { + "External id": 988106,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521851.847, "dur": 0.243, + "args": { + "External id": 988107,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521853.083, "dur": 0.252, + "args": { + "External id": 988108,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521854.292, "dur": 0.244, + "args": { + "External id": 988109,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521857.816, "dur": 0.254, + "args": { + "External id": 988110,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521859.027, "dur": 0.249, + "args": { + "External id": 988111,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521860.250, "dur": 0.250, + "args": { + "External id": 988112,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521861.503, "dur": 0.300, + "args": { + "External id": 988113,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521862.705, "dur": 0.252, + "args": { + "External id": 988114,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521863.970, "dur": 0.260, + "args": { + "External id": 988115,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521865.266, "dur": 0.257, + "args": { + "External id": 988116,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521866.595, "dur": 0.245, + "args": { + "External id": 988117,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521869.648, "dur": 0.257, + "args": { + "External id": 988118,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521870.871, "dur": 0.247, + "args": { + "External id": 988119,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521872.943, "dur": 0.250, + "args": { + "External id": 988120,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521874.202, "dur": 0.241, + "args": { + "External id": 988121,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521875.410, "dur": 0.255, + "args": { + "External id": 988122,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521876.714, "dur": 0.244, + "args": { + "External id": 988123,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521878.040, "dur": 0.258, + "args": { + "External id": 988124,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521879.289, "dur": 0.246, + "args": { + "External id": 988125,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521883.006, "dur": 0.261, + "args": { + "External id": 988126,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521884.185, "dur": 0.245, + "args": { + "External id": 988127,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521885.460, "dur": 0.273, + "args": { + "External id": 988128,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521886.727, "dur": 0.250, + "args": { + "External id": 988129,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521887.857, "dur": 0.288, + "args": { + "External id": 988130,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521889.150, "dur": 0.257, + "args": { + "External id": 988131,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521890.457, "dur": 0.256, + "args": { + "External id": 988132,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521891.597, "dur": 0.247, + "args": { + "External id": 988133,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521895.297, "dur": 0.256, + "args": { + "External id": 988134,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521896.459, "dur": 0.380, + "args": { + "External id": 988135,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521897.774, "dur": 0.389, + "args": { + "External id": 988136,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521899.223, "dur": 0.251, + "args": { + "External id": 988137,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521900.382, "dur": 0.377, + "args": { + "External id": 988138,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521901.721, "dur": 0.322, + "args": { + "External id": 988139,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521903.185, "dur": 0.261, + "args": { + "External id": 988140,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521904.351, "dur": 0.296, + "args": { + "External id": 988141,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521907.890, "dur": 0.406, + "args": { + "External id": 988142,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521909.213, "dur": 0.418, + "args": { + "External id": 988143,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521910.521, "dur": 0.389, + "args": { + "External id": 988144,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521911.830, "dur": 0.247, + "args": { + "External id": 988145,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521913.018, "dur": 0.356, + "args": { + "External id": 988146,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521914.349, "dur": 0.245, + "args": { + "External id": 988147,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521915.630, "dur": 0.251, + "args": { + "External id": 988148,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521916.943, "dur": 0.246, + "args": { + "External id": 988149,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521919.840, "dur": 0.251, + "args": { + "External id": 988150,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521921.122, "dur": 0.248, + "args": { + "External id": 988151,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521922.451, "dur": 0.249, + "args": { + "External id": 988152,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521923.591, "dur": 0.246, + "args": { + "External id": 988153,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521924.758, "dur": 0.257, + "args": { + "External id": 988154,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521925.938, "dur": 0.252, + "args": { + "External id": 988155,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521927.638, "dur": 0.252, + "args": { + "External id": 988156,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521930.550, "dur": 0.267, + "args": { + "External id": 988157,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521934.377, "dur": 0.254, + "args": { + "External id": 988158,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942521935.548, "dur": 0.253, + "args": { + "External id": 988159,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338708, "tid": 2338708, + "ts": 6345942521999.836, "dur": 1735.749, + "args": { + "External id": 988160,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2338708, "tid": 2338708, + "ts": 6345942522479.307, "dur": 1158.085, + "args": { + "External id": 994305,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522488.955, "dur": 11.621, + "args": { + "External id": 994306,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522495.803, "dur": 3.800, + "args": { + "External id": 994307,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522501.193, "dur": 1.977, + "args": { + "External id": 994308,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522502.284, "dur": 0.788, + "args": { + "External id": 994309,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522503.632, "dur": 4.671, + "args": { + "External id": 994310,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522505.618, "dur": 2.494, + "args": { + "External id": 994311,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522508.671, "dur": 1.719, + "args": { + "External id": 994312,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522509.187, "dur": 1.095, + "args": { + "External id": 994313,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522510.718, "dur": 2.454, + "args": { + "External id": 994314,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522512.408, "dur": 0.677, + "args": { + "External id": 994315,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522513.461, "dur": 3.451, + "args": { + "External id": 994316,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522516.198, "dur": 0.637, + "args": { + "External id": 994317,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522519.500, "dur": 1.093, + "args": { + "External id": 994318,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522519.948, "dur": 0.562, + "args": { + "External id": 994319,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522520.930, "dur": 3.002, + "args": { + "External id": 994320,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522523.278, "dur": 0.573, + "args": { + "External id": 994321,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522524.212, "dur": 3.318, + "args": { + "External id": 994322,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522526.828, "dur": 0.628, + "args": { + "External id": 994323,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522527.800, "dur": 1.455, + "args": { + "External id": 994324,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522528.251, "dur": 0.926, + "args": { + "External id": 994325,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522529.559, "dur": 4.719, + "args": { + "External id": 994326,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522531.278, "dur": 2.691, + "args": { + "External id": 994327,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522534.605, "dur": 1.078, + "args": { + "External id": 994328,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522535.051, "dur": 0.555, + "args": { + "External id": 994329,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522535.972, "dur": 3.198, + "args": { + "External id": 994330,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522538.326, "dur": 0.761, + "args": { + "External id": 994331,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522539.442, "dur": 3.062, + "args": { + "External id": 994332,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522541.820, "dur": 0.603, + "args": { + "External id": 994333,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522545.103, "dur": 1.436, + "args": { + "External id": 994334,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522545.593, "dur": 0.861, + "args": { + "External id": 994335,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522546.886, "dur": 3.021, + "args": { + "External id": 994336,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522548.807, "dur": 1.020, + "args": { + "External id": 994337,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522550.240, "dur": 3.212, + "args": { + "External id": 994338,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522552.708, "dur": 0.658, + "args": { + "External id": 994339,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522553.819, "dur": 1.351, + "args": { + "External id": 994340,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522554.265, "dur": 0.822, + "args": { + "External id": 994341,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522555.519, "dur": 4.132, + "args": { + "External id": 994342,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522557.417, "dur": 2.141, + "args": { + "External id": 994343,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522559.977, "dur": 1.176, + "args": { + "External id": 994344,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522560.491, "dur": 0.586, + "args": { + "External id": 994345,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522561.450, "dur": 2.698, + "args": { + "External id": 994346,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522563.064, "dur": 1.001, + "args": { + "External id": 994347,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522564.420, "dur": 3.650, + "args": { + "External id": 994348,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522567.062, "dur": 0.924, + "args": { + "External id": 994349,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522570.666, "dur": 1.253, + "args": { + "External id": 994350,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522571.180, "dur": 0.656, + "args": { + "External id": 994351,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522572.303, "dur": 3.603, + "args": { + "External id": 994352,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522574.856, "dur": 0.968, + "args": { + "External id": 994353,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522576.247, "dur": 3.177, + "args": { + "External id": 994354,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522578.619, "dur": 0.726, + "args": { + "External id": 994355,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522579.922, "dur": 1.165, + "args": { + "External id": 994356,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522580.366, "dur": 0.645, + "args": { + "External id": 994357,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522581.442, "dur": 4.978, + "args": { + "External id": 994358,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522583.944, "dur": 2.378, + "args": { + "External id": 994359,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522586.734, "dur": 1.316, + "args": { + "External id": 994360,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522587.182, "dur": 0.790, + "args": { + "External id": 994361,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522588.324, "dur": 3.611, + "args": { + "External id": 994362,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522590.910, "dur": 0.945, + "args": { + "External id": 994363,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522592.212, "dur": 3.620, + "args": { + "External id": 994364,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522594.919, "dur": 0.826, + "args": { + "External id": 994365,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522598.239, "dur": 1.298, + "args": { + "External id": 994366,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522598.765, "dur": 0.687, + "args": { + "External id": 994367,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522599.812, "dur": 3.300, + "args": { + "External id": 994368,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522602.329, "dur": 0.701, + "args": { + "External id": 994369,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522603.432, "dur": 3.013, + "args": { + "External id": 994370,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522605.619, "dur": 0.738, + "args": { + "External id": 994371,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522606.914, "dur": 1.026, + "args": { + "External id": 994372,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522607.391, "dur": 0.466, + "args": { + "External id": 994373,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522608.325, "dur": 4.923, + "args": { + "External id": 994374,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522610.820, "dur": 2.337, + "args": { + "External id": 994375,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522613.526, "dur": 1.185, + "args": { + "External id": 994376,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522613.974, "dur": 0.658, + "args": { + "External id": 994377,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522615.115, "dur": 3.276, + "args": { + "External id": 994378,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522617.302, "dur": 1.004, + "args": { + "External id": 994379,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522618.670, "dur": 3.278, + "args": { + "External id": 994380,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522620.864, "dur": 0.995, + "args": { + "External id": 994381,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522624.244, "dur": 1.324, + "args": { + "External id": 994382,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522624.732, "dur": 0.755, + "args": { + "External id": 994383,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522625.842, "dur": 2.671, + "args": { + "External id": 994384,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522627.423, "dur": 1.009, + "args": { + "External id": 994385,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522628.866, "dur": 3.640, + "args": { + "External id": 994386,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522631.855, "dur": 0.576, + "args": { + "External id": 994387,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522632.866, "dur": 1.468, + "args": { + "External id": 994388,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522633.326, "dur": 0.926, + "args": { + "External id": 994389,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522634.614, "dur": 4.637, + "args": { + "External id": 994390,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522636.535, "dur": 2.429, + "args": { + "External id": 994391,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522639.534, "dur": 1.185, + "args": { + "External id": 994392,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522639.994, "dur": 0.644, + "args": { + "External id": 994393,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522641.003, "dur": 2.841, + "args": { + "External id": 994394,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522642.805, "dur": 0.956, + "args": { + "External id": 994395,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522644.126, "dur": 2.800, + "args": { + "External id": 994396,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522646.182, "dur": 0.667, + "args": { + "External id": 994397,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522649.534, "dur": 1.174, + "args": { + "External id": 994398,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522650.017, "dur": 0.610, + "args": { + "External id": 994399,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522650.984, "dur": 2.222, + "args": { + "External id": 994400,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522652.499, "dur": 0.626, + "args": { + "External id": 994401,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522653.507, "dur": 3.347, + "args": { + "External id": 994402,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522656.026, "dur": 0.749, + "args": { + "External id": 994403,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522657.224, "dur": 1.007, + "args": { + "External id": 994404,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522657.675, "dur": 0.480, + "args": { + "External id": 994405,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522658.507, "dur": 4.710, + "args": { + "External id": 994406,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522660.923, "dur": 2.186, + "args": { + "External id": 994407,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522663.532, "dur": 1.494, + "args": { + "External id": 994408,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522664.017, "dur": 0.929, + "args": { + "External id": 994409,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522665.302, "dur": 2.631, + "args": { + "External id": 994410,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522666.996, "dur": 0.854, + "args": { + "External id": 994411,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522668.212, "dur": 3.321, + "args": { + "External id": 994412,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522670.454, "dur": 0.996, + "args": { + "External id": 994413,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522674.086, "dur": 1.714, + "args": { + "External id": 994414,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522674.582, "dur": 1.134, + "args": { + "External id": 994415,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522676.075, "dur": 4.444, + "args": { + "External id": 994416,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522678.581, "dur": 1.851, + "args": { + "External id": 994417,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522680.797, "dur": 1.217, + "args": { + "External id": 994418,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522681.274, "dur": 0.660, + "args": { + "External id": 994419,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522682.286, "dur": 1.452, + "args": { + "External id": 994420,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522682.740, "dur": 0.918, + "args": { + "External id": 994421,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522684.032, "dur": 4.401, + "args": { + "External id": 994422,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522687.539, "dur": 0.813, + "args": { + "External id": 994423,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522688.766, "dur": 1.355, + "args": { + "External id": 994424,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522689.216, "dur": 0.831, + "args": { + "External id": 994425,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522690.396, "dur": 3.001, + "args": { + "External id": 994426,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522692.778, "dur": 0.481, + "args": { + "External id": 994427,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522693.678, "dur": 2.886, + "args": { + "External id": 994428,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522695.743, "dur": 0.723, + "args": { + "External id": 994429,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522699.005, "dur": 1.134, + "args": { + "External id": 994430,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522699.466, "dur": 0.600, + "args": { + "External id": 994431,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522700.517, "dur": 4.066, + "args": { + "External id": 994432,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522702.375, "dur": 2.104, + "args": { + "External id": 994433,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522704.865, "dur": 1.358, + "args": { + "External id": 994434,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522705.426, "dur": 0.717, + "args": { + "External id": 994435,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522706.498, "dur": 1.322, + "args": { + "External id": 994436,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522706.947, "dur": 0.792, + "args": { + "External id": 994437,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522708.211, "dur": 4.197, + "args": { + "External id": 994438,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522711.457, "dur": 0.874, + "args": { + "External id": 994439,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522712.760, "dur": 1.268, + "args": { + "External id": 994440,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522713.250, "dur": 0.703, + "args": { + "External id": 994441,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522714.313, "dur": 2.756, + "args": { + "External id": 994442,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522716.334, "dur": 0.652, + "args": { + "External id": 994443,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522717.347, "dur": 3.343, + "args": { + "External id": 994444,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522719.836, "dur": 0.776, + "args": { + "External id": 994445,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522723.358, "dur": 1.100, + "args": { + "External id": 994446,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522723.848, "dur": 0.526, + "args": { + "External id": 994447,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522724.916, "dur": 3.942, + "args": { + "External id": 994448,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522726.990, "dur": 1.776, + "args": { + "External id": 994449,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522729.284, "dur": 1.323, + "args": { + "External id": 994450,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522729.731, "dur": 0.795, + "args": { + "External id": 994451,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522730.907, "dur": 1.381, + "args": { + "External id": 994452,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522731.374, "dur": 0.835, + "args": { + "External id": 994453,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522732.602, "dur": 5.059, + "args": { + "External id": 994454,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522736.900, "dur": 0.684, + "args": { + "External id": 994455,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522737.984, "dur": 1.103, + "args": { + "External id": 994456,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522738.433, "dur": 0.581, + "args": { + "External id": 994457,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522739.369, "dur": 2.633, + "args": { + "External id": 994458,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522741.305, "dur": 0.619, + "args": { + "External id": 994459,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522742.281, "dur": 2.465, + "args": { + "External id": 994460,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522743.959, "dur": 0.710, + "args": { + "External id": 994461,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522747.645, "dur": 1.387, + "args": { + "External id": 994462,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522748.144, "dur": 0.814, + "args": { + "External id": 994463,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522749.306, "dur": 3.701, + "args": { + "External id": 994464,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522751.093, "dur": 1.825, + "args": { + "External id": 994465,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522753.291, "dur": 1.198, + "args": { + "External id": 994466,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522753.760, "dur": 0.647, + "args": { + "External id": 994467,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522754.968, "dur": 1.364, + "args": { + "External id": 994468,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522755.437, "dur": 0.809, + "args": { + "External id": 994469,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522756.608, "dur": 4.538, + "args": { + "External id": 994470,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522760.423, "dur": 0.641, + "args": { + "External id": 994471,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522761.524, "dur": 1.308, + "args": { + "External id": 994472,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522762.001, "dur": 0.750, + "args": { + "External id": 994473,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522763.246, "dur": 3.087, + "args": { + "External id": 994474,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522765.604, "dur": 0.649, + "args": { + "External id": 994475,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522766.764, "dur": 2.524, + "args": { + "External id": 994476,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522768.516, "dur": 0.695, + "args": { + "External id": 994477,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522771.847, "dur": 1.424, + "args": { + "External id": 994478,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522772.340, "dur": 0.850, + "args": { + "External id": 994479,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522773.591, "dur": 3.695, + "args": { + "External id": 994480,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522775.455, "dur": 1.723, + "args": { + "External id": 994481,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522777.571, "dur": 1.421, + "args": { + "External id": 994482,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522778.023, "dur": 0.884, + "args": { + "External id": 994483,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522779.430, "dur": 1.328, + "args": { + "External id": 994484,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522779.881, "dur": 0.792, + "args": { + "External id": 994485,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522781.106, "dur": 4.188, + "args": { + "External id": 994486,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522784.651, "dur": 0.562, + "args": { + "External id": 994487,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522785.636, "dur": 2.319, + "args": { + "External id": 994488,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522787.309, "dur": 0.569, + "args": { + "External id": 994489,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522788.279, "dur": 2.091, + "args": { + "External id": 994490,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522789.422, "dur": 0.859, + "args": { + "External id": 994491,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522790.678, "dur": 3.524, + "args": { + "External id": 994492,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522793.085, "dur": 1.039, + "args": { + "External id": 994493,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522796.770, "dur": 1.963, + "args": { + "External id": 994494,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522797.996, "dur": 0.658, + "args": { + "External id": 994495,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522799.054, "dur": 3.105, + "args": { + "External id": 994496,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522800.313, "dur": 1.759, + "args": { + "External id": 994497,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522802.461, "dur": 1.918, + "args": { + "External id": 994498,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522803.591, "dur": 0.713, + "args": { + "External id": 994499,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522804.677, "dur": 1.897, + "args": { + "External id": 994500,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522805.478, "dur": 1.014, + "args": { + "External id": 994501,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522806.873, "dur": 4.148, + "args": { + "External id": 994502,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522810.086, "dur": 0.856, + "args": { + "External id": 994503,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522811.351, "dur": 1.468, + "args": { + "External id": 994504,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522812.106, "dur": 0.630, + "args": { + "External id": 994505,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522813.118, "dur": 2.373, + "args": { + "External id": 994506,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522814.406, "dur": 1.004, + "args": { + "External id": 994507,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522815.889, "dur": 3.469, + "args": { + "External id": 994508,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522818.724, "dur": 0.552, + "args": { + "External id": 994509,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522821.996, "dur": 2.236, + "args": { + "External id": 994510,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522823.404, "dur": 0.750, + "args": { + "External id": 994511,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522824.541, "dur": 3.497, + "args": { + "External id": 994512,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522825.743, "dur": 1.988, + "args": { + "External id": 994513,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522828.371, "dur": 2.074, + "args": { + "External id": 994514,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522829.812, "dur": 0.546, + "args": { + "External id": 994515,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522830.744, "dur": 1.734, + "args": { + "External id": 994516,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522831.647, "dur": 0.754, + "args": { + "External id": 994517,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522832.791, "dur": 4.152, + "args": { + "External id": 994518,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522836.303, "dur": 0.561, + "args": { + "External id": 994519,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522837.327, "dur": 1.694, + "args": { + "External id": 994520,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522838.154, "dur": 0.786, + "args": { + "External id": 994521,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522839.319, "dur": 1.979, + "args": { + "External id": 994522,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522840.453, "dur": 0.765, + "args": { + "External id": 994523,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522841.595, "dur": 2.952, + "args": { + "External id": 994524,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522843.902, "dur": 0.554, + "args": { + "External id": 994525,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522847.298, "dur": 2.316, + "args": { + "External id": 994526,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522848.773, "dur": 0.761, + "args": { + "External id": 994527,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522849.952, "dur": 2.812, + "args": { + "External id": 994528,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522850.720, "dur": 1.938, + "args": { + "External id": 994529,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522853.086, "dur": 1.846, + "args": { + "External id": 994530,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522854.256, "dur": 0.597, + "args": { + "External id": 994531,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522855.509, "dur": 1.936, + "args": { + "External id": 994532,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522856.282, "dur": 1.077, + "args": { + "External id": 994533,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522857.746, "dur": 4.120, + "args": { + "External id": 994534,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522860.951, "dur": 0.828, + "args": { + "External id": 994535,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522862.207, "dur": 1.739, + "args": { + "External id": 994536,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522863.154, "dur": 0.704, + "args": { + "External id": 994537,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522864.255, "dur": 2.231, + "args": { + "External id": 994538,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522865.571, "dur": 0.835, + "args": { + "External id": 994539,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522866.822, "dur": 3.034, + "args": { + "External id": 994540,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522868.877, "dur": 0.896, + "args": { + "External id": 994541,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522872.470, "dur": 2.259, + "args": { + "External id": 994542,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522873.953, "dur": 0.694, + "args": { + "External id": 994543,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522875.027, "dur": 2.626, + "args": { + "External id": 994544,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522875.968, "dur": 1.570, + "args": { + "External id": 994545,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522877.990, "dur": 2.349, + "args": { + "External id": 994546,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522879.419, "dur": 0.832, + "args": { + "External id": 994547,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522880.656, "dur": 1.599, + "args": { + "External id": 994548,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522881.433, "dur": 0.740, + "args": { + "External id": 994549,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522882.564, "dur": 4.263, + "args": { + "External id": 994550,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522886.126, "dur": 0.618, + "args": { + "External id": 994551,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522887.155, "dur": 1.780, + "args": { + "External id": 994552,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522888.171, "dur": 0.675, + "args": { + "External id": 994553,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522889.251, "dur": 2.322, + "args": { + "External id": 994554,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522890.563, "dur": 0.930, + "args": { + "External id": 994555,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522891.892, "dur": 3.648, + "args": { + "External id": 994556,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522894.771, "dur": 0.690, + "args": { + "External id": 994557,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522898.102, "dur": 2.458, + "args": { + "External id": 994558,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522899.766, "dur": 0.711, + "args": { + "External id": 994559,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522900.863, "dur": 2.603, + "args": { + "External id": 994560,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522901.571, "dur": 1.808, + "args": { + "External id": 994561,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522903.807, "dur": 1.761, + "args": { + "External id": 994562,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522904.942, "dur": 0.546, + "args": { + "External id": 994563,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522905.864, "dur": 1.636, + "args": { + "External id": 994564,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522906.729, "dur": 0.693, + "args": { + "External id": 994565,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522907.814, "dur": 3.546, + "args": { + "External id": 994566,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522910.684, "dur": 0.592, + "args": { + "External id": 994567,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522911.710, "dur": 1.956, + "args": { + "External id": 994568,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522912.993, "dur": 0.591, + "args": { + "External id": 994569,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522913.987, "dur": 1.974, + "args": { + "External id": 994570,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522915.082, "dur": 0.799, + "args": { + "External id": 994571,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522916.273, "dur": 3.027, + "args": { + "External id": 994572,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522918.620, "dur": 0.580, + "args": { + "External id": 994573,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522922.244, "dur": 1.934, + "args": { + "External id": 994574,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522923.438, "dur": 0.653, + "args": { + "External id": 994575,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522924.493, "dur": 3.438, + "args": { + "External id": 994576,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522925.495, "dur": 2.350, + "args": { + "External id": 994577,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522928.277, "dur": 1.831, + "args": { + "External id": 994578,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522929.501, "dur": 0.527, + "args": { + "External id": 994579,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522930.514, "dur": 2.153, + "args": { + "External id": 994580,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522931.362, "dur": 1.225, + "args": { + "External id": 994581,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522932.970, "dur": 3.959, + "args": { + "External id": 994582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522936.217, "dur": 0.639, + "args": { + "External id": 994583,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522937.252, "dur": 1.441, + "args": { + "External id": 994584,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522937.997, "dur": 0.618, + "args": { + "External id": 994585,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522939.138, "dur": 2.602, + "args": { + "External id": 994586,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522940.756, "dur": 0.906, + "args": { + "External id": 994587,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522942.045, "dur": 3.453, + "args": { + "External id": 994588,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522944.660, "dur": 0.760, + "args": { + "External id": 994589,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522948.111, "dur": 2.275, + "args": { + "External id": 994590,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522949.714, "dur": 0.596, + "args": { + "External id": 994591,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522950.726, "dur": 2.621, + "args": { + "External id": 994592,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522951.540, "dur": 1.513, + "args": { + "External id": 994593,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522953.671, "dur": 2.407, + "args": { + "External id": 994594,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522955.130, "dur": 0.866, + "args": { + "External id": 994595,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522956.589, "dur": 2.053, + "args": { + "External id": 994596,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522957.471, "dur": 1.089, + "args": { + "External id": 994597,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522958.952, "dur": 5.311, + "args": { + "External id": 994598,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522963.502, "dur": 0.681, + "args": { + "External id": 994599,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522964.592, "dur": 1.664, + "args": { + "External id": 994600,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522965.573, "dur": 0.603, + "args": { + "External id": 994601,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522966.620, "dur": 2.545, + "args": { + "External id": 994602,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522968.269, "dur": 0.817, + "args": { + "External id": 994603,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522969.558, "dur": 3.485, + "args": { + "External id": 994604,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522972.164, "dur": 0.797, + "args": { + "External id": 994605,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522975.354, "dur": 2.206, + "args": { + "External id": 994606,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522976.887, "dur": 0.590, + "args": { + "External id": 994607,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522977.858, "dur": 3.077, + "args": { + "External id": 994608,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522978.689, "dur": 2.149, + "args": { + "External id": 994609,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522981.302, "dur": 1.876, + "args": { + "External id": 994610,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522982.548, "dur": 0.545, + "args": { + "External id": 994611,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522983.513, "dur": 2.004, + "args": { + "External id": 994612,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522984.701, "dur": 0.737, + "args": { + "External id": 994613,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522986.008, "dur": 4.081, + "args": { + "External id": 994614,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522989.182, "dur": 0.825, + "args": { + "External id": 994615,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522990.439, "dur": 1.572, + "args": { + "External id": 994616,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522991.252, "dur": 0.681, + "args": { + "External id": 994617,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522992.311, "dur": 2.507, + "args": { + "External id": 994618,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522993.953, "dur": 0.789, + "args": { + "External id": 994619,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942522995.173, "dur": 2.770, + "args": { + "External id": 994620,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942522997.208, "dur": 0.645, + "args": { + "External id": 994621,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523000.464, "dur": 2.240, + "args": { + "External id": 994622,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523002.073, "dur": 0.555, + "args": { + "External id": 994623,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523003.060, "dur": 2.566, + "args": { + "External id": 994624,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523004.000, "dur": 1.544, + "args": { + "External id": 994625,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523005.946, "dur": 16.767, + "args": { + "External id": 994626,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523020.773, "dur": 1.405, + "args": { + "External id": 994627,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523023.731, "dur": 2.118, + "args": { + "External id": 994628,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523025.144, "dur": 0.621, + "args": { + "External id": 994629,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523026.164, "dur": 3.725, + "args": { + "External id": 994630,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523029.171, "dur": 0.632, + "args": { + "External id": 994631,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523030.242, "dur": 1.778, + "args": { + "External id": 994632,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523031.219, "dur": 0.720, + "args": { + "External id": 994633,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523032.351, "dur": 2.635, + "args": { + "External id": 994634,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523033.991, "dur": 0.920, + "args": { + "External id": 994635,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523035.493, "dur": 3.427, + "args": { + "External id": 994636,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523038.192, "dur": 0.645, + "args": { + "External id": 994637,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523041.303, "dur": 1.767, + "args": { + "External id": 994638,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523042.346, "dur": 0.643, + "args": { + "External id": 994639,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523043.374, "dur": 3.574, + "args": { + "External id": 994640,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523044.535, "dur": 2.315, + "args": { + "External id": 994641,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523047.356, "dur": 2.135, + "args": { + "External id": 994642,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523048.484, "dur": 0.931, + "args": { + "External id": 994643,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523049.931, "dur": 2.364, + "args": { + "External id": 994644,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523051.499, "dur": 0.719, + "args": { + "External id": 994645,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523084.118, "dur": 6.406, + "args": { + "External id": 994646,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523089.093, "dur": 1.114, + "args": { + "External id": 994647,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523090.970, "dur": 2.172, + "args": { + "External id": 994648,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523091.935, "dur": 1.128, + "args": { + "External id": 994649,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523093.524, "dur": 2.134, + "args": { + "External id": 994650,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523094.798, "dur": 0.770, + "args": { + "External id": 994651,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523096.018, "dur": 3.046, + "args": { + "External id": 994652,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523098.447, "dur": 0.494, + "args": { + "External id": 994653,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523101.718, "dur": 2.144, + "args": { + "External id": 994654,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523103.271, "dur": 0.512, + "args": { + "External id": 994655,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523104.323, "dur": 3.053, + "args": { + "External id": 994656,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523105.239, "dur": 1.855, + "args": { + "External id": 994657,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523107.798, "dur": 2.039, + "args": { + "External id": 994658,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523109.181, "dur": 0.580, + "args": { + "External id": 994659,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523110.303, "dur": 2.091, + "args": { + "External id": 994660,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523111.126, "dur": 1.175, + "args": { + "External id": 994661,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523112.991, "dur": 4.516, + "args": { + "External id": 994662,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523116.538, "dur": 0.877, + "args": { + "External id": 994663,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523117.942, "dur": 1.916, + "args": { + "External id": 994664,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523118.826, "dur": 0.954, + "args": { + "External id": 994665,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523120.378, "dur": 2.394, + "args": { + "External id": 994666,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523121.803, "dur": 0.881, + "args": { + "External id": 994667,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523123.089, "dur": 3.360, + "args": { + "External id": 994668,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523125.522, "dur": 0.852, + "args": { + "External id": 994669,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523129.134, "dur": 2.060, + "args": { + "External id": 994670,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523130.030, "dur": 1.065, + "args": { + "External id": 994671,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523131.531, "dur": 5.075, + "args": { + "External id": 994672,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523133.985, "dur": 2.470, + "args": { + "External id": 994673,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523136.974, "dur": 2.027, + "args": { + "External id": 994674,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523138.323, "dur": 0.577, + "args": { + "External id": 994675,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523139.351, "dur": 2.182, + "args": { + "External id": 994676,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523140.571, "dur": 0.876, + "args": { + "External id": 994677,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523141.854, "dur": 4.404, + "args": { + "External id": 994678,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523145.336, "dur": 0.845, + "args": { + "External id": 994679,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523146.602, "dur": 1.584, + "args": { + "External id": 994680,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523147.464, "dur": 0.643, + "args": { + "External id": 994681,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523148.551, "dur": 2.490, + "args": { + "External id": 994682,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523150.141, "dur": 0.810, + "args": { + "External id": 994683,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523151.361, "dur": 3.057, + "args": { + "External id": 994684,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523153.663, "dur": 0.678, + "args": { + "External id": 994685,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523157.179, "dur": 2.461, + "args": { + "External id": 994686,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523158.647, "dur": 0.909, + "args": { + "External id": 994687,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523159.954, "dur": 2.514, + "args": { + "External id": 994688,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523160.866, "dur": 1.513, + "args": { + "External id": 994689,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523162.838, "dur": 2.129, + "args": { + "External id": 994690,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523164.224, "dur": 0.663, + "args": { + "External id": 994691,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523165.314, "dur": 2.059, + "args": { + "External id": 994692,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523166.613, "dur": 0.671, + "args": { + "External id": 994693,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523167.752, "dur": 3.876, + "args": { + "External id": 994694,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523170.954, "dur": 0.597, + "args": { + "External id": 994695,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523172.007, "dur": 1.720, + "args": { + "External id": 994696,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523173.052, "dur": 0.593, + "args": { + "External id": 994697,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523174.087, "dur": 2.145, + "args": { + "External id": 994698,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523175.329, "dur": 0.814, + "args": { + "External id": 994699,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523176.632, "dur": 3.266, + "args": { + "External id": 994700,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523179.032, "dur": 0.790, + "args": { + "External id": 994701,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523182.970, "dur": 1.826, + "args": { + "External id": 994702,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523184.061, "dur": 0.646, + "args": { + "External id": 994703,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523185.358, "dur": 3.287, + "args": { + "External id": 994704,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523186.466, "dur": 2.080, + "args": { + "External id": 994705,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523188.976, "dur": 2.267, + "args": { + "External id": 994706,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523190.502, "dur": 0.659, + "args": { + "External id": 994707,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523191.587, "dur": 1.693, + "args": { + "External id": 994708,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523192.381, "dur": 0.813, + "args": { + "External id": 994709,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523193.690, "dur": 3.995, + "args": { + "External id": 994710,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523196.711, "dur": 0.893, + "args": { + "External id": 994711,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523198.028, "dur": 1.628, + "args": { + "External id": 994712,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523198.834, "dur": 0.742, + "args": { + "External id": 994713,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523200.057, "dur": 2.620, + "args": { + "External id": 994714,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523201.534, "dur": 1.053, + "args": { + "External id": 994715,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523202.997, "dur": 3.560, + "args": { + "External id": 994716,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523205.348, "dur": 1.125, + "args": { + "External id": 994717,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523209.874, "dur": 2.130, + "args": { + "External id": 994718,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523211.012, "dur": 0.912, + "args": { + "External id": 994719,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523212.338, "dur": 2.435, + "args": { + "External id": 994720,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523213.241, "dur": 1.252, + "args": { + "External id": 994721,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523215.107, "dur": 2.503, + "args": { + "External id": 994722,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523216.636, "dur": 0.894, + "args": { + "External id": 994723,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523218.169, "dur": 2.284, + "args": { + "External id": 994724,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523219.344, "dur": 1.023, + "args": { + "External id": 994725,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523220.780, "dur": 3.607, + "args": { + "External id": 994726,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523223.596, "dur": 0.716, + "args": { + "External id": 994727,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523224.729, "dur": 1.873, + "args": { + "External id": 994728,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523225.878, "dur": 0.649, + "args": { + "External id": 994729,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523226.922, "dur": 1.976, + "args": { + "External id": 994730,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523228.114, "dur": 0.695, + "args": { + "External id": 994731,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523229.410, "dur": 2.908, + "args": { + "External id": 994732,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523231.227, "dur": 1.012, + "args": { + "External id": 994733,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523234.938, "dur": 2.293, + "args": { + "External id": 994734,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523236.339, "dur": 0.812, + "args": { + "External id": 994735,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523237.646, "dur": 2.436, + "args": { + "External id": 994736,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523238.439, "dur": 1.463, + "args": { + "External id": 994737,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523240.405, "dur": 1.949, + "args": { + "External id": 994738,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523241.691, "dur": 0.584, + "args": { + "External id": 994739,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523242.826, "dur": 2.832, + "args": { + "External id": 994740,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523243.946, "dur": 1.624, + "args": { + "External id": 994741,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523245.983, "dur": 4.059, + "args": { + "External id": 994742,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523249.164, "dur": 0.794, + "args": { + "External id": 994743,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523250.421, "dur": 1.636, + "args": { + "External id": 994744,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523251.412, "dur": 0.568, + "args": { + "External id": 994745,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523252.562, "dur": 2.602, + "args": { + "External id": 994746,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523254.161, "dur": 0.917, + "args": { + "External id": 994747,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523255.658, "dur": 2.779, + "args": { + "External id": 994748,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523257.658, "dur": 0.700, + "args": { + "External id": 994749,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523261.031, "dur": 1.970, + "args": { + "External id": 994750,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523262.332, "dur": 0.591, + "args": { + "External id": 994751,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523263.319, "dur": 2.915, + "args": { + "External id": 994752,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523264.094, "dur": 1.853, + "args": { + "External id": 994753,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523266.587, "dur": 1.893, + "args": { + "External id": 994754,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523267.681, "dur": 0.722, + "args": { + "External id": 994755,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523268.832, "dur": 2.216, + "args": { + "External id": 994756,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523269.996, "dur": 0.966, + "args": { + "External id": 994757,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523271.366, "dur": 3.783, + "args": { + "External id": 994758,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523274.275, "dur": 0.797, + "args": { + "External id": 994759,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523275.498, "dur": 2.066, + "args": { + "External id": 994760,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523276.820, "dur": 0.669, + "args": { + "External id": 994761,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523277.895, "dur": 2.322, + "args": { + "External id": 994762,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523279.440, "dur": 0.691, + "args": { + "External id": 994763,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523280.571, "dur": 3.352, + "args": { + "External id": 994764,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523283.208, "dur": 0.632, + "args": { + "External id": 994765,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523286.238, "dur": 6.056, + "args": { + "External id": 994766,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523291.152, "dur": 1.051, + "args": { + "External id": 994767,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523292.626, "dur": 3.198, + "args": { + "External id": 994768,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523293.714, "dur": 2.016, + "args": { + "External id": 994769,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523296.168, "dur": 2.256, + "args": { + "External id": 994770,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523297.605, "dur": 0.710, + "args": { + "External id": 994771,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523298.800, "dur": 1.664, + "args": { + "External id": 994772,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523299.534, "dur": 0.844, + "args": { + "External id": 994773,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523300.812, "dur": 3.866, + "args": { + "External id": 994774,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523303.955, "dur": 0.643, + "args": { + "External id": 994775,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523305.031, "dur": 1.847, + "args": { + "External id": 994776,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523306.245, "dur": 0.546, + "args": { + "External id": 994777,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523307.399, "dur": 2.015, + "args": { + "External id": 994778,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523308.664, "dur": 0.662, + "args": { + "External id": 994779,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523309.821, "dur": 2.285, + "args": { + "External id": 994780,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523311.408, "dur": 0.619, + "args": { + "External id": 994781,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523314.682, "dur": 2.220, + "args": { + "External id": 994782,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523316.068, "dur": 0.753, + "args": { + "External id": 994783,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523317.275, "dur": 3.311, + "args": { + "External id": 994784,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523318.508, "dur": 1.984, + "args": { + "External id": 994785,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523320.922, "dur": 1.816, + "args": { + "External id": 994786,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523322.110, "dur": 0.554, + "args": { + "External id": 994787,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523323.095, "dur": 1.939, + "args": { + "External id": 994788,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523324.121, "dur": 0.825, + "args": { + "External id": 994789,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523325.359, "dur": 3.922, + "args": { + "External id": 994790,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523328.672, "dur": 0.527, + "args": { + "External id": 994791,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523329.644, "dur": 1.753, + "args": { + "External id": 994792,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523330.356, "dur": 0.966, + "args": { + "External id": 994793,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523331.821, "dur": 2.020, + "args": { + "External id": 994794,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523332.839, "dur": 0.912, + "args": { + "External id": 994795,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523334.155, "dur": 2.859, + "args": { + "External id": 994796,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523336.118, "dur": 0.817, + "args": { + "External id": 994797,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523339.715, "dur": 2.474, + "args": { + "External id": 994798,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523341.300, "dur": 0.808, + "args": { + "External id": 994799,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523342.537, "dur": 3.123, + "args": { + "External id": 994800,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523343.356, "dur": 2.198, + "args": { + "External id": 994801,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523346.043, "dur": 2.161, + "args": { + "External id": 994802,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523347.518, "dur": 0.604, + "args": { + "External id": 994803,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523348.638, "dur": 2.150, + "args": { + "External id": 994804,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523349.774, "dur": 0.934, + "args": { + "External id": 994805,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523351.348, "dur": 3.367, + "args": { + "External id": 994806,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523354.009, "dur": 0.626, + "args": { + "External id": 994807,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523355.165, "dur": 2.046, + "args": { + "External id": 994808,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523356.226, "dur": 0.906, + "args": { + "External id": 994809,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523357.581, "dur": 2.375, + "args": { + "External id": 994810,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523359.169, "dur": 0.703, + "args": { + "External id": 994811,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523360.291, "dur": 3.420, + "args": { + "External id": 994812,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523362.813, "dur": 0.822, + "args": { + "External id": 994813,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523366.291, "dur": 2.136, + "args": { + "External id": 994814,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523367.532, "dur": 0.814, + "args": { + "External id": 994815,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523368.790, "dur": 23.543, + "args": { + "External id": 994816,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523390.490, "dur": 1.534, + "args": { + "External id": 994817,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523392.669, "dur": 2.146, + "args": { + "External id": 994818,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523393.839, "dur": 0.889, + "args": { + "External id": 994819,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523395.158, "dur": 1.908, + "args": { + "External id": 994820,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523396.218, "dur": 0.754, + "args": { + "External id": 994821,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523397.386, "dur": 3.405, + "args": { + "External id": 994822,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523400.111, "dur": 0.591, + "args": { + "External id": 994823,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523401.175, "dur": 1.677, + "args": { + "External id": 994824,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523401.980, "dur": 0.792, + "args": { + "External id": 994825,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523403.225, "dur": 2.110, + "args": { + "External id": 994826,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523404.640, "dur": 0.608, + "args": { + "External id": 994827,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523405.675, "dur": 2.794, + "args": { + "External id": 994828,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523407.781, "dur": 0.611, + "args": { + "External id": 994829,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523411.220, "dur": 2.312, + "args": { + "External id": 994830,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523412.877, "dur": 0.581, + "args": { + "External id": 994831,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523413.884, "dur": 3.624, + "args": { + "External id": 994832,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523415.106, "dur": 2.295, + "args": { + "External id": 994833,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523417.833, "dur": 1.761, + "args": { + "External id": 994834,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523418.864, "dur": 0.653, + "args": { + "External id": 994835,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523419.930, "dur": 1.751, + "args": { + "External id": 994836,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523420.970, "dur": 0.625, + "args": { + "External id": 994837,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523422.042, "dur": 3.532, + "args": { + "External id": 994838,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523424.933, "dur": 0.565, + "args": { + "External id": 994839,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523425.919, "dur": 1.601, + "args": { + "External id": 994840,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523426.809, "dur": 0.632, + "args": { + "External id": 994841,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523427.875, "dur": 2.146, + "args": { + "External id": 994842,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523429.200, "dur": 0.738, + "args": { + "External id": 994843,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523430.621, "dur": 2.610, + "args": { + "External id": 994844,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523432.512, "dur": 0.639, + "args": { + "External id": 994845,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523435.935, "dur": 2.387, + "args": { + "External id": 994846,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523437.552, "dur": 0.689, + "args": { + "External id": 994847,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523438.677, "dur": 2.488, + "args": { + "External id": 994848,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523439.487, "dur": 1.590, + "args": { + "External id": 994849,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2338708, "tid": 2338708, + "ts": 6345942523441.578, "dur": 1.933, + "args": { + "External id": 994850,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523442.795, "dur": 0.636, + "args": { + "External id": 994851,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2338708, "tid": 2338708, + "ts": 6345942523466.932, "dur": 155.008, + "args": { + "External id": 994852,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2338708, "tid": 2338708, + "ts": 6345942523753.491, "dur": 165.568, + "args": { + "External id": 994853,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[273], [], [], [], []], "Ev Idx": 21609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2338708, "tid": 2338708, + "ts": 6345942523836.571, "dur": 59.025, + "args": { + "External id": 994854,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[273], [], [], [], []], "Ev Idx": 21610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2338708, "tid": 2338708, + "ts": 6345942523855.422, "dur": 1.231, + "args": { + "External id": 994855,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 21611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Redistribute", "pid": 2338708, "tid": 2338708, + "ts": 6345942524410.951, "dur": 1215.546, + "args": { + "External id": 994856,"Sequence number": 10552706, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "False"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338708, "tid": 2338708, + "ts": 6345942524485.212, "dur": 81.574, + "args": { + "External id": 994857,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942524492.491, "dur": 1.707, + "args": { + "External id": 994858,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942524496.857, "dur": 0.508, + "args": { + "External id": 994859,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::all_reduce", "pid": 2338708, "tid": 2338708, + "ts": 6345942524648.143, "dur": 576.386, + "args": { + "External id": 994860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["float", "", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2338708, "tid": 2338708, + "ts": 6345942524653.256, "dur": 60.088, + "args": { + "External id": 994861,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2338708, "tid": 2338708, + "ts": 6345942524659.812, "dur": 12.927, + "args": { + "External id": 994862,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "0"], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345942524665.610, "dur": 6.096, + "args": { + "External id": 994863,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2338708, "tid": 2338708, + "ts": 6345942524674.532, "dur": 38.133, + "args": { + "External id": 994864,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 2338708, "tid": 2338708, + "ts": 6345942524723.874, "dur": 495.485, + "args": { + "External id": 994865,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "-1"], "Input type": ["TensorList", "", "", "", "Scalar"], "Input Strides": [[[]], [], [], [], []], "Input Dims": [[[]], [], [], [], []], "Ev Idx": 21621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345942524765.786, "dur": 444.072, + "args": { + "External id": 994866,"Record function id": 0, "Collective name": "allreduce", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 2, "Input Strides": [[[]], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "2", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1, "Process Group Name": "0", "Input type": ["TensorList", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[[]], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 21622, "In msg nelems": 1 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 2338708, "tid": 2338708, + "ts": 6345942524781.350, "dur": 421.288, + "args": { + "External id": 994867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338708, "tid": 2338708, + "ts": 6345942525314.818, "dur": 266.781, + "args": { + "External id": 994868,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::wait_tensor", "pid": 2338708, "tid": 2338708, + "ts": 6345942525439.519, "dur": 43.913, + "args": { + "External id": 994869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2338708, "tid": 2338708, + "ts": 6345942525466.218, "dur": 6.075, + "args": { + "External id": 994870,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "2", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 21626, "In msg nelems": 0, "Rank": 2, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2338708, "tid": 2338708, + "ts": 6345942525518.885, "dur": 55.611, + "args": { + "External id": 994871,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942525522.379, "dur": 1.391, + "args": { + "External id": 994872,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942525525.319, "dur": 0.756, + "args": { + "External id": 994873,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_ToTorchTensor", "pid": 2338708, "tid": 2338708, + "ts": 6345942525647.997, "dur": 27.767, + "args": { + "External id": 994874,"Sequence number": 10552707, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2338708, "tid": 2338708, + "ts": 6345942525661.039, "dur": 10.449, + "args": { + "External id": 994875,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2338708, "tid": 2338708, + "ts": 6345942525664.167, "dur": 7.121, + "args": { + "External id": 994876,"Record function id": 0, "Concrete Inputs": ["", "[]"], "Input type": ["float", "ScalarList"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2338708, "tid": 2338708, + "ts": 6345942526177.548, "dur": 46.305, + "args": { + "External id": 994877,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "double", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reciprocal", "pid": 2338708, "tid": 2338708, + "ts": 6345942526236.734, "dur": 28.230, + "args": { + "External id": 994878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 2338708, "tid": 2338708, + "ts": 6345942526273.110, "dur": 30.563, + "args": { + "External id": 994879,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "double"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clamp", "pid": 2338708, "tid": 2338708, + "ts": 6345942526318.472, "dur": 32.683, + "args": { + "External id": 994880,"Record function id": 0, "Concrete Inputs": ["", "", "1."], "Input type": ["float", "", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 21636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942526324.359, "dur": 0.694, + "args": { + "External id": 994881,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 21637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2338708, "tid": 2338708, + "ts": 6345942526370.799, "dur": 0.676, + "args": { + "External id": 994882,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 21638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2338708, "tid": 2338708, + "ts": 6345942526520.391, "dur": 1236.526, + "args": { + "External id": 994883,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2338708, "tid": 2338708, + "ts": 6345942526992.242, "dur": 714.211, + "args": { + "External id": 994884,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isnan", "pid": 2338708, "tid": 2338708, + "ts": 6345942527819.360, "dur": 39.563, + "args": { + "External id": 994885,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2338708, "tid": 2338708, + "ts": 6345942527825.008, "dur": 33.294, + "args": { + "External id": 994886,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338708, "tid": 2338708, + "ts": 6345942527864.415, "dur": 7918.684, + "args": { + "External id": 994887,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2338708, + "ts": 6345942527866.716, "dur": 7915.867, + "args": { + "External id": 994888,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2338708, + "ts": 6345942527869.122, "dur": 7910.556, + "args": { + "External id": 994889,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isinf", "pid": 2338708, "tid": 2338708, + "ts": 6345942535798.337, "dur": 100.836, + "args": { + "External id": 994890,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345942535803.439, "dur": 60.723, + "args": { + "External id": 994891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2338708, "tid": 2338708, + "ts": 6345942535817.171, "dur": 5.451, + "args": { + "External id": 994892,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 21648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2338708, "tid": 2338708, + "ts": 6345942535824.613, "dur": 39.138, + "args": { + "External id": 994893,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], [1]], "Input Dims": [[], [0]], "Ev Idx": 21649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2338708, "tid": 2338708, + "ts": 6345942535835.207, "dur": 4.459, + "args": { + "External id": 994894,"Record function id": 0, "Concrete Inputs": ["", "[]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 21650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2338708, "tid": 2338708, + "ts": 6345942535866.421, "dur": 31.570, + "args": { + "External id": 994895,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2338708, "tid": 2338708, + "ts": 6345942535903.203, "dur": 45.505, + "args": { + "External id": 994896,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2338708, "tid": 2338708, + "ts": 6345942535904.457, "dur": 43.979, + "args": { + "External id": 994897,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2338708, "tid": 2338708, + "ts": 6345942535905.880, "dur": 42.153, + "args": { + "External id": 994898,"Sequence number": 10552708, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 21654 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#OptimizersContainer.step", "pid": 2338708, "tid": 2338708, + "ts": 6345942535992.018, "dur": 6757.421, + "args": { + "External id": 994899,"Record function id": 0, "Ev Idx": 21655 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#AdamW.step", "pid": 2338708, "tid": 2338708, + "ts": 6345942536083.073, "dur": 6635.192, + "args": { + "External id": 994900,"Record function id": 0, "Ev Idx": 21656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_add_", "pid": 2338708, "tid": 2338708, + "ts": 6345942537598.296, "dur": 270.010, + "args": { + "External id": 994901,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537623.678, "dur": 1.305, + "args": { + "External id": 994902,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537626.344, "dur": 0.184, + "args": { + "External id": 994903,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537627.045, "dur": 0.285, + "args": { + "External id": 994904,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537627.813, "dur": 0.118, + "args": { + "External id": 994905,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537628.446, "dur": 0.258, + "args": { + "External id": 994906,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537629.269, "dur": 0.336, + "args": { + "External id": 994907,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537630.377, "dur": 0.433, + "args": { + "External id": 994908,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537631.373, "dur": 0.122, + "args": { + "External id": 994909,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537631.952, "dur": 0.113, + "args": { + "External id": 994910,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537632.765, "dur": 0.089, + "args": { + "External id": 994911,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537633.515, "dur": 0.255, + "args": { + "External id": 994912,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537634.378, "dur": 0.235, + "args": { + "External id": 994913,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537635.085, "dur": 0.292, + "args": { + "External id": 994914,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537635.810, "dur": 0.327, + "args": { + "External id": 994915,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537637.174, "dur": 0.113, + "args": { + "External id": 994916,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537637.756, "dur": 0.119, + "args": { + "External id": 994917,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537638.356, "dur": 0.323, + "args": { + "External id": 994918,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537639.145, "dur": 0.084, + "args": { + "External id": 994919,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537639.664, "dur": 0.092, + "args": { + "External id": 994920,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537640.217, "dur": 0.098, + "args": { + "External id": 994921,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537640.781, "dur": 0.108, + "args": { + "External id": 994922,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537641.387, "dur": 0.085, + "args": { + "External id": 994923,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537641.941, "dur": 0.087, + "args": { + "External id": 994924,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537642.578, "dur": 0.086, + "args": { + "External id": 994925,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537643.130, "dur": 0.079, + "args": { + "External id": 994926,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537643.784, "dur": 0.083, + "args": { + "External id": 994927,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537644.355, "dur": 0.079, + "args": { + "External id": 994928,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537644.872, "dur": 0.078, + "args": { + "External id": 994929,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537645.400, "dur": 0.082, + "args": { + "External id": 994930,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537645.909, "dur": 0.083, + "args": { + "External id": 994931,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537646.497, "dur": 0.081, + "args": { + "External id": 994932,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537647.092, "dur": 0.088, + "args": { + "External id": 994933,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537647.683, "dur": 0.081, + "args": { + "External id": 994934,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537648.268, "dur": 0.082, + "args": { + "External id": 994935,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537648.867, "dur": 0.078, + "args": { + "External id": 994936,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537649.434, "dur": 0.078, + "args": { + "External id": 994937,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537649.977, "dur": 0.284, + "args": { + "External id": 994938,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537650.665, "dur": 0.323, + "args": { + "External id": 994939,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537651.302, "dur": 0.083, + "args": { + "External id": 994940,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537651.698, "dur": 0.074, + "args": { + "External id": 994941,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537652.468, "dur": 0.099, + "args": { + "External id": 994942,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537653.001, "dur": 0.064, + "args": { + "External id": 994943,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537653.611, "dur": 0.082, + "args": { + "External id": 994944,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537653.998, "dur": 0.062, + "args": { + "External id": 994945,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537654.508, "dur": 0.084, + "args": { + "External id": 994946,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537655.091, "dur": 0.085, + "args": { + "External id": 994947,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537655.585, "dur": 0.079, + "args": { + "External id": 994948,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537656.153, "dur": 0.085, + "args": { + "External id": 994949,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537656.750, "dur": 0.084, + "args": { + "External id": 994950,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537657.329, "dur": 0.081, + "args": { + "External id": 994951,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537658.121, "dur": 0.082, + "args": { + "External id": 994952,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537658.502, "dur": 0.064, + "args": { + "External id": 994953,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537659.130, "dur": 0.082, + "args": { + "External id": 994954,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537659.509, "dur": 0.063, + "args": { + "External id": 994955,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537660.039, "dur": 0.084, + "args": { + "External id": 994956,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537660.462, "dur": 0.063, + "args": { + "External id": 994957,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537661.008, "dur": 0.083, + "args": { + "External id": 994958,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537661.389, "dur": 0.066, + "args": { + "External id": 994959,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537661.842, "dur": 0.083, + "args": { + "External id": 994960,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537662.225, "dur": 0.066, + "args": { + "External id": 994961,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537662.765, "dur": 0.080, + "args": { + "External id": 994962,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537663.144, "dur": 0.066, + "args": { + "External id": 994963,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537663.616, "dur": 0.088, + "args": { + "External id": 994964,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537664.001, "dur": 0.276, + "args": { + "External id": 994965,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537665.002, "dur": 0.237, + "args": { + "External id": 994966,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537665.787, "dur": 0.068, + "args": { + "External id": 994967,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537666.264, "dur": 0.088, + "args": { + "External id": 994968,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537666.652, "dur": 0.206, + "args": { + "External id": 994969,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537667.334, "dur": 0.228, + "args": { + "External id": 994970,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537667.858, "dur": 0.156, + "args": { + "External id": 994971,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537668.478, "dur": 0.112, + "args": { + "External id": 994972,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537668.888, "dur": 0.098, + "args": { + "External id": 994973,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537669.416, "dur": 0.115, + "args": { + "External id": 994974,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537669.832, "dur": 0.064, + "args": { + "External id": 994975,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537670.333, "dur": 0.197, + "args": { + "External id": 994976,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537670.825, "dur": 0.060, + "args": { + "External id": 994977,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537671.469, "dur": 0.060, + "args": { + "External id": 994978,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537671.840, "dur": 0.058, + "args": { + "External id": 994979,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537672.382, "dur": 0.083, + "args": { + "External id": 994980,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537672.765, "dur": 0.060, + "args": { + "External id": 994981,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537673.262, "dur": 0.086, + "args": { + "External id": 994982,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537673.646, "dur": 0.066, + "args": { + "External id": 994983,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537674.154, "dur": 0.082, + "args": { + "External id": 994984,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537674.535, "dur": 0.064, + "args": { + "External id": 994985,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537674.938, "dur": 0.078, + "args": { + "External id": 994986,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537675.305, "dur": 0.066, + "args": { + "External id": 994987,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537675.835, "dur": 0.084, + "args": { + "External id": 994988,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537676.222, "dur": 0.077, + "args": { + "External id": 994989,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537676.983, "dur": 0.082, + "args": { + "External id": 994990,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537677.559, "dur": 0.079, + "args": { + "External id": 994991,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537678.162, "dur": 0.078, + "args": { + "External id": 994992,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537678.548, "dur": 0.084, + "args": { + "External id": 994993,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537679.141, "dur": 0.075, + "args": { + "External id": 994994,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537679.705, "dur": 0.081, + "args": { + "External id": 994995,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537680.172, "dur": 0.113, + "args": { + "External id": 994996,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537680.584, "dur": 0.062, + "args": { + "External id": 994997,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537680.993, "dur": 0.086, + "args": { + "External id": 994998,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537681.383, "dur": 0.063, + "args": { + "External id": 994999,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537681.805, "dur": 0.084, + "args": { + "External id": 995000,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537682.190, "dur": 0.064, + "args": { + "External id": 995001,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537682.772, "dur": 0.080, + "args": { + "External id": 995002,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537683.153, "dur": 0.069, + "args": { + "External id": 995003,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537683.684, "dur": 0.081, + "args": { + "External id": 995004,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537684.069, "dur": 0.066, + "args": { + "External id": 995005,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537684.625, "dur": 0.083, + "args": { + "External id": 995006,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537685.013, "dur": 0.067, + "args": { + "External id": 995007,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537685.503, "dur": 0.083, + "args": { + "External id": 995008,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537685.883, "dur": 0.062, + "args": { + "External id": 995009,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537686.407, "dur": 0.078, + "args": { + "External id": 995010,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537686.798, "dur": 0.067, + "args": { + "External id": 995011,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537687.313, "dur": 0.084, + "args": { + "External id": 995012,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537687.705, "dur": 0.067, + "args": { + "External id": 995013,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537688.437, "dur": 0.086, + "args": { + "External id": 995014,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537688.857, "dur": 0.067, + "args": { + "External id": 995015,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537689.397, "dur": 0.080, + "args": { + "External id": 995016,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537689.782, "dur": 0.062, + "args": { + "External id": 995017,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537690.490, "dur": 0.082, + "args": { + "External id": 995018,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537690.874, "dur": 0.064, + "args": { + "External id": 995019,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537691.378, "dur": 0.082, + "args": { + "External id": 995020,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537691.769, "dur": 0.061, + "args": { + "External id": 995021,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537692.515, "dur": 0.078, + "args": { + "External id": 995022,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537692.890, "dur": 0.065, + "args": { + "External id": 995023,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537693.657, "dur": 0.080, + "args": { + "External id": 995024,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537694.040, "dur": 0.070, + "args": { + "External id": 995025,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537694.484, "dur": 0.078, + "args": { + "External id": 995026,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537694.858, "dur": 0.220, + "args": { + "External id": 995027,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537695.540, "dur": 0.227, + "args": { + "External id": 995028,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537696.065, "dur": 0.212, + "args": { + "External id": 995029,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537696.722, "dur": 0.241, + "args": { + "External id": 995030,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537697.265, "dur": 0.067, + "args": { + "External id": 995031,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537697.925, "dur": 0.092, + "args": { + "External id": 995032,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537698.352, "dur": 0.065, + "args": { + "External id": 995033,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537698.942, "dur": 0.085, + "args": { + "External id": 995034,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537699.521, "dur": 0.104, + "args": { + "External id": 995035,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537700.135, "dur": 0.217, + "args": { + "External id": 995036,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537700.657, "dur": 0.087, + "args": { + "External id": 995037,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537701.201, "dur": 0.216, + "args": { + "External id": 995038,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537701.725, "dur": 0.060, + "args": { + "External id": 995039,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537702.342, "dur": 0.100, + "args": { + "External id": 995040,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537702.741, "dur": 0.065, + "args": { + "External id": 995041,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537703.283, "dur": 0.095, + "args": { + "External id": 995042,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537703.678, "dur": 0.094, + "args": { + "External id": 995043,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537704.452, "dur": 0.124, + "args": { + "External id": 995044,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537704.870, "dur": 0.113, + "args": { + "External id": 995045,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537705.608, "dur": 0.121, + "args": { + "External id": 995046,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537706.037, "dur": 0.061, + "args": { + "External id": 995047,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537706.592, "dur": 0.080, + "args": { + "External id": 995048,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537706.972, "dur": 0.064, + "args": { + "External id": 995049,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537707.472, "dur": 0.112, + "args": { + "External id": 995050,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537707.900, "dur": 0.063, + "args": { + "External id": 995051,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537708.378, "dur": 0.086, + "args": { + "External id": 995052,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537708.772, "dur": 0.067, + "args": { + "External id": 995053,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537709.276, "dur": 0.088, + "args": { + "External id": 995054,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537709.686, "dur": 0.066, + "args": { + "External id": 995055,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537710.286, "dur": 0.084, + "args": { + "External id": 995056,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537710.690, "dur": 0.064, + "args": { + "External id": 995057,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537711.262, "dur": 0.068, + "args": { + "External id": 995058,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537711.651, "dur": 0.067, + "args": { + "External id": 995059,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537712.211, "dur": 0.089, + "args": { + "External id": 995060,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537712.612, "dur": 0.065, + "args": { + "External id": 995061,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537713.148, "dur": 0.087, + "args": { + "External id": 995062,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537713.552, "dur": 0.059, + "args": { + "External id": 995063,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537714.061, "dur": 0.082, + "args": { + "External id": 995064,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537714.459, "dur": 0.066, + "args": { + "External id": 995065,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537714.999, "dur": 0.085, + "args": { + "External id": 995066,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537715.401, "dur": 0.062, + "args": { + "External id": 995067,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537716.027, "dur": 0.078, + "args": { + "External id": 995068,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537716.427, "dur": 0.057, + "args": { + "External id": 995069,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537717.037, "dur": 0.067, + "args": { + "External id": 995070,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537717.418, "dur": 0.064, + "args": { + "External id": 995071,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537717.861, "dur": 0.080, + "args": { + "External id": 995072,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537718.276, "dur": 0.062, + "args": { + "External id": 995073,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537718.984, "dur": 0.061, + "args": { + "External id": 995074,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537719.371, "dur": 0.068, + "args": { + "External id": 995075,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537719.952, "dur": 0.089, + "args": { + "External id": 995076,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537720.548, "dur": 0.084, + "args": { + "External id": 995077,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537721.363, "dur": 0.082, + "args": { + "External id": 995078,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537721.764, "dur": 0.060, + "args": { + "External id": 995079,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537722.325, "dur": 0.084, + "args": { + "External id": 995080,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537722.730, "dur": 0.065, + "args": { + "External id": 995081,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537723.244, "dur": 0.068, + "args": { + "External id": 995082,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537723.635, "dur": 0.060, + "args": { + "External id": 995083,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537724.233, "dur": 0.079, + "args": { + "External id": 995084,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537724.626, "dur": 0.063, + "args": { + "External id": 995085,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537725.182, "dur": 0.078, + "args": { + "External id": 995086,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537725.586, "dur": 0.061, + "args": { + "External id": 995087,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537726.162, "dur": 0.076, + "args": { + "External id": 995088,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537726.559, "dur": 0.065, + "args": { + "External id": 995089,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537727.107, "dur": 0.078, + "args": { + "External id": 995090,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537727.503, "dur": 0.062, + "args": { + "External id": 995091,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537728.163, "dur": 0.080, + "args": { + "External id": 995092,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537728.638, "dur": 0.066, + "args": { + "External id": 995093,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537729.382, "dur": 0.077, + "args": { + "External id": 995094,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537729.775, "dur": 0.061, + "args": { + "External id": 995095,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537730.224, "dur": 0.081, + "args": { + "External id": 995096,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537730.623, "dur": 0.075, + "args": { + "External id": 995097,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537731.207, "dur": 0.082, + "args": { + "External id": 995098,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537731.600, "dur": 0.075, + "args": { + "External id": 995099,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537732.245, "dur": 0.228, + "args": { + "External id": 995100,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537732.790, "dur": 0.075, + "args": { + "External id": 995101,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537733.328, "dur": 0.125, + "args": { + "External id": 995102,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537733.770, "dur": 0.062, + "args": { + "External id": 995103,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537734.265, "dur": 0.082, + "args": { + "External id": 995104,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537734.665, "dur": 0.076, + "args": { + "External id": 995105,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537735.212, "dur": 0.233, + "args": { + "External id": 995106,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537735.760, "dur": 0.065, + "args": { + "External id": 995107,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537736.492, "dur": 0.085, + "args": { + "External id": 995108,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537736.892, "dur": 0.061, + "args": { + "External id": 995109,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537737.324, "dur": 0.083, + "args": { + "External id": 995110,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537737.722, "dur": 0.064, + "args": { + "External id": 995111,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537738.284, "dur": 0.196, + "args": { + "External id": 995112,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537738.795, "dur": 0.063, + "args": { + "External id": 995113,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537739.279, "dur": 0.081, + "args": { + "External id": 995114,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537739.677, "dur": 0.064, + "args": { + "External id": 995115,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537740.357, "dur": 0.082, + "args": { + "External id": 995116,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537740.956, "dur": 0.081, + "args": { + "External id": 995117,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537741.672, "dur": 0.085, + "args": { + "External id": 995118,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537742.067, "dur": 0.077, + "args": { + "External id": 995119,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537742.828, "dur": 0.082, + "args": { + "External id": 995120,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537743.416, "dur": 0.083, + "args": { + "External id": 995121,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537744.112, "dur": 0.080, + "args": { + "External id": 995122,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537744.512, "dur": 0.072, + "args": { + "External id": 995123,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537745.039, "dur": 0.082, + "args": { + "External id": 995124,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537745.441, "dur": 0.061, + "args": { + "External id": 995125,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537746.154, "dur": 0.082, + "args": { + "External id": 995126,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537746.552, "dur": 0.063, + "args": { + "External id": 995127,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537747.132, "dur": 0.083, + "args": { + "External id": 995128,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537747.530, "dur": 0.061, + "args": { + "External id": 995129,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537748.123, "dur": 0.110, + "args": { + "External id": 995130,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537748.552, "dur": 0.067, + "args": { + "External id": 995131,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537749.262, "dur": 0.083, + "args": { + "External id": 995132,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537749.658, "dur": 0.070, + "args": { + "External id": 995133,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537750.138, "dur": 0.079, + "args": { + "External id": 995134,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537750.541, "dur": 0.064, + "args": { + "External id": 995135,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537751.037, "dur": 0.085, + "args": { + "External id": 995136,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537751.440, "dur": 0.067, + "args": { + "External id": 995137,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537752.122, "dur": 0.078, + "args": { + "External id": 995138,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537752.519, "dur": 0.066, + "args": { + "External id": 995139,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537753.090, "dur": 0.086, + "args": { + "External id": 995140,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537753.497, "dur": 0.064, + "args": { + "External id": 995141,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537754.006, "dur": 0.085, + "args": { + "External id": 995142,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537754.409, "dur": 0.063, + "args": { + "External id": 995143,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537755.188, "dur": 0.081, + "args": { + "External id": 995144,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537755.583, "dur": 0.066, + "args": { + "External id": 995145,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537756.118, "dur": 0.081, + "args": { + "External id": 995146,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537756.516, "dur": 0.065, + "args": { + "External id": 995147,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537757.016, "dur": 0.081, + "args": { + "External id": 995148,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537757.408, "dur": 0.064, + "args": { + "External id": 995149,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537758.005, "dur": 0.065, + "args": { + "External id": 995150,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537758.379, "dur": 0.066, + "args": { + "External id": 995151,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537758.924, "dur": 0.081, + "args": { + "External id": 995152,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537759.317, "dur": 0.067, + "args": { + "External id": 995153,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537759.832, "dur": 0.078, + "args": { + "External id": 995154,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537760.224, "dur": 0.065, + "args": { + "External id": 995155,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537760.810, "dur": 0.077, + "args": { + "External id": 995156,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537761.206, "dur": 0.064, + "args": { + "External id": 995157,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537761.726, "dur": 0.079, + "args": { + "External id": 995158,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537762.125, "dur": 0.065, + "args": { + "External id": 995159,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537762.707, "dur": 0.085, + "args": { + "External id": 995160,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537763.105, "dur": 0.085, + "args": { + "External id": 995161,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537763.846, "dur": 0.079, + "args": { + "External id": 995162,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537764.275, "dur": 0.070, + "args": { + "External id": 995163,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537764.859, "dur": 0.084, + "args": { + "External id": 995164,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537765.432, "dur": 0.078, + "args": { + "External id": 995165,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537766.056, "dur": 0.063, + "args": { + "External id": 995166,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537766.435, "dur": 0.063, + "args": { + "External id": 995167,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537766.952, "dur": 0.076, + "args": { + "External id": 995168,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537767.343, "dur": 0.064, + "args": { + "External id": 995169,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537767.752, "dur": 0.083, + "args": { + "External id": 995170,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537768.153, "dur": 0.067, + "args": { + "External id": 995171,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537768.731, "dur": 0.232, + "args": { + "External id": 995172,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537769.272, "dur": 0.093, + "args": { + "External id": 995173,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2338708, "tid": 2338708, + "ts": 6345942537770.026, "dur": 0.107, + "args": { + "External id": 995174,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 21930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2338708, "tid": 2338708, + "ts": 6345942538454.254, "dur": 4149.810, + "args": { + "External id": 995175,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "9.3476373648457231e-06", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 21931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2338708, "tid": 2338708, + "ts": 6345942541296.415, "dur": 1084.504, + "args": { + "External id": 995176,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "9.3476373648457231e-06", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 21932 + } + }, + { + "name": "process_name", "ph": "M", "ts": 6345936068085.116, "pid": 2338708, "tid": 0, + "args": { + "name": "python3.12" + } + }, + { + "name": "process_labels", "ph": "M", "ts": 6345936068085.116, "pid": 2338708, "tid": 0, + "args": { + "labels": "CPU" + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 6345936068085.116, "pid": 2338708, "tid": 0, + "args": { + "sort_index": 2338708 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6345936068085.116, "pid": 2338708, "tid": 2379421, + "args": { + "name": "thread 2379421 (pt_autograd_2)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6345936068085.116, "pid": 2338708, "tid": 2379421, + "args": { + "sort_index": 2379421 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6345936068085.116, "pid": 2338708, "tid": 2379421, + "args": { + "name": "thread 2379421 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6345936068085.116, "pid": 2338708, "tid": 2379421, + "args": { + "sort_index": 2379421 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 6345936068085.116, "pid": 2338708, "tid": 2338708, + "args": { + "name": "thread 2338708 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 6345936068085.116, "pid": 2338708, "tid": 2338708, + "args": { + "sort_index": 2338708 + } + }, + { + "ph": "X", "cat": "Trace", "ts": 6345936067959.438, "dur": 6481538.439, + "pid": "Spans", "tid": "PyTorch Profiler", + "name": "PyTorch Profiler (0)", + "args": { + "Op count": 0 + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 6345936067959.438, + "pid": "Spans", "tid": 0, + "args": { + "sort_index": 536870912 + } + }, + { + "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", + "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 6345936067959.438 + }, + { + "name": "Record Window End", "ph": "i", "s": "g", + "pid": "", "tid": "", "ts": 6345942656787.207 + } + ], + "traceName": "exp/mtp.code.7B.batch8.seqlen4096.context4096.warmup400.update2.steps40000.lr2e-5.cosine/profile_trace/iteration_22528/rank2_trace.json", + "displayTimeUnit": "ms", + "baseTimeNanoseconds": 1751410836000000000 +} \ No newline at end of file